In [11]:
# Improved visualization with deduplicated group headers for "process_num"
import pandas as pd
import numpy as np
from pathlib import Path



csv_path = "table/summary_metrics.csv"
df = pd.read_csv(csv_path)
df.columns = [c.strip().lower() for c in df.columns]

def pick_col(cands, cols, required=True):
    for c in cands:
        if c in cols:
            return c
    if required:
        raise KeyError(f"Missing one of columns {cands}. Found: {list(cols)}")
    return None

col_process   = pick_col(["process_num","process_number","p_num","process"], df.columns)
col_act       = pick_col(["act_func","activation","activation_function","act_name"], df.columns)
col_batch     = pick_col(["batch_size","batch_portion","batch","b"], df.columns)
col_sgd_time  = pick_col(["sgd_iter_time","sgd_time","iter_time","max_time","training_time"], df.columns)
col_test_time = pick_col(["testing_time","test_time"], df.columns, required=False)

for c in [col_process, col_batch, col_sgd_time, col_test_time]:
    if c is not None and c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# ---- Table 1: mean times by process × activation ----
value_cols = [col_sgd_time] + ([col_test_time] if col_test_time else [])
df_group_proc_act = (
    df.groupby([col_process, col_act], dropna=False)[value_cols]
      .mean(numeric_only=True)
      .reset_index()
      .rename(columns={
          col_process: "process_num",
          col_act: "activation_function",
          col_sgd_time: "sgd_iter_time_mean",
          **({col_test_time: "testing_time_mean"} if col_test_time else {})
      })
)

# Sort for clean grouped view
sort_cols = ["process_num", "activation_function"]
df_group_proc_act = df_group_proc_act.sort_values(sort_cols)

# Visualization copy: avoid repeating the same process_num
vis1 = df_group_proc_act.copy()
vis1["process_num"] = vis1["process_num"].where(
    vis1["process_num"].ne(vis1["process_num"].shift()), ""
)

# Optional formatting of time columns
for c in ["sgd_iter_time_mean", "testing_time_mean"]:
    if c in vis1.columns:
        vis1[c] = vis1[c].map(lambda x: f"{x:.3f}" if pd.notna(x) else "")

# ---- Table 2: mean of numeric columns by batch size (clean) ----
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
numeric_cols_wo_key = [c for c in numeric_cols if c != col_batch]

df_group_batch = (
    df.groupby(col_batch, as_index=False)[numeric_cols_wo_key]
      .mean(numeric_only=True)
      .rename(columns={col_batch: "batch_size"})
      .sort_values("batch_size")
)

# Optional: round numeric means for readability
round_cols = df_group_batch.select_dtypes(include=[np.number]).columns
df_group_batch[round_cols] = df_group_batch[round_cols].round(3)

# # Save & display both nice tables
# out1 = "/mnt/data/df_group_proc_act_pretty.csv"
# out2 = "/mnt/data/df_group_batch_pretty.csv"
# df_group_proc_act.to_csv(out1, index=False)  # save the true aggregated data
# df_group_batch.to_csv(out2, index=False)


df_group_proc_act

Unnamed: 0,process_num,activation_function,sgd_iter_time_mean,testing_time_mean
0,1,relu,0.559103,4.340288
1,1,sigmoid,1.631908,11.457897
2,1,tanh,1.031369,5.895992
3,2,relu,0.344205,3.543477
4,2,sigmoid,0.950509,9.366346
5,2,tanh,0.571791,3.996511
6,3,relu,0.264911,2.481092
7,3,sigmoid,0.672279,8.212471
8,3,tanh,0.424638,2.861514


In [57]:
import matplotlib.pyplot as plt
import pandas as pd
import os

def plot_training_history(
    df: pd.DataFrame,
    x_col: str,
    y_cols: list,
    title: str = "Training History",
    xlabel: str = "Epoch / Iteration",
    ylabel: str = "Metric Value",
    legend_fontsize: int = 10,
    axis_fontsize: int = 12,
    figsize=(7, 6),
    alpha: float = 0.8,
    grid: bool = True,
    save_path: str = "training_history.pdf"
):
    """
    Robust plotting function for training history, with PDF output.

    Args:
        df (pd.DataFrame): Data containing metrics.
        x_col (str): Column name for x-axis (e.g., 'epoch').
        y_cols (list): List of column names to plot (e.g., ['train_RMSE', 'val_RMSE']).
        title, xlabel, ylabel: Text for title and axes.
        legend_fontsize, axis_fontsize: Font sizes.
        figsize (tuple): Figure size (width, height).
        alpha (float): Line transparency.
        grid (bool): Whether to show grid lines.
        save_path (str): Path (with or without .pdf) to save the figure.
    """
    # Normalize extension
    if not save_path.lower().endswith(".pdf"):
        save_path = os.path.splitext(save_path)[0] + ".pdf"

    actual_cols = [c.lower() for c in df.columns]
    plotted = False

    plt.figure(figsize=figsize)
    for col in y_cols:
        # Case-insensitive matching
        match = [c for c in df.columns if c.lower() == col.lower()]
        if not match:
            print(f"[Warning] Column '{col}' not found in DataFrame. Skipping.")
            continue
        plt.plot(df[x_col], np.sqrt(df[match[0]]), label=match[0], alpha=alpha, linewidth=2)
        plotted = True

    if not plotted:
        print("[Error] No valid columns plotted. Please check column names.")
        return

    plt.title(title, fontsize=axis_fontsize + 2)
    plt.xlabel(xlabel, fontsize=axis_fontsize)
    plt.ylabel(ylabel, fontsize=axis_fontsize)
    plt.legend(fontsize=legend_fontsize)
    if grid:
        plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()

    plt.savefig(save_path, format="pdf", bbox_inches="tight")
    print(f"✅ Plot saved as PDF: {os.path.abspath(save_path)}")
    plt.close()


In [61]:

csv_path = "train_his_csv/loss_record_relu.csv"
df_relu = pd.read_csv(csv_path)

plot_training_history(
    df_relu,
    x_col="epoch",
    y_cols=["train_loss", "val_loss"],
    title="ReLu Activation Training History",
    xlabel="Epoch",
    ylabel="RMSE",
    legend_fontsize=17,
    axis_fontsize=16,
    alpha=0.9,
    save_path = "relu_training_history.pdf"
)


✅ Plot saved as PDF: /Users/yifanyu/Desktop/mpiNN/relu_training_history.pdf


In [62]:

csv_path = "train_his_csv/loss_record_sigmoid.csv"
df_relu = pd.read_csv(csv_path)

plot_training_history(
    df_relu,
    x_col="epoch",
    y_cols=["train_loss", "val_loss"],
    title="Sigmoid Activation Training History",
    xlabel="Epoch",
    ylabel="RMSE",
    legend_fontsize=17,
    axis_fontsize=16,
    alpha=0.9,
    save_path = "sigmoid_training_history.pdf"
)


✅ Plot saved as PDF: /Users/yifanyu/Desktop/mpiNN/sigmoid_training_history.pdf


In [63]:

csv_path = "train_his_csv/loss_record_tanh.csv"
df_relu = pd.read_csv(csv_path)

plot_training_history(
    df_relu,
    x_col="epoch",
    y_cols=["train_loss", "val_loss"],
    title="Tanh Activation Training History",
    xlabel="Epoch",
    ylabel="RMSE",
    legend_fontsize=17,
    axis_fontsize=16,
    alpha=0.9,
    save_path = "tanh_training_history.pdf"
)


✅ Plot saved as PDF: /Users/yifanyu/Desktop/mpiNN/tanh_training_history.pdf
