In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np

In [36]:
# --- Paths ---
csv_path = "logs/final_step_running_2025-08-31_22-45-45/Swiss_Roll/metrics_all_regs_with_diffs.csv"
save_folder = os.path.dirname(csv_path)
os.makedirs(save_folder, exist_ok=True)

# --- Load CSV ---
df = pd.read_csv(csv_path)

# Remove percentage signs and convert to float for pct diff columns
pct_cols = [c for c in df.columns if "_pct_diff" in c]
for col in pct_cols:
    df[col] = df[col].str.replace('%', '').astype(float)

In [37]:
# Split mean ± std columns
def split_mean_std(col):
    means, stds = [], []
    for v in df[col]:
        if '±' in str(v):
            m, s = str(v).split('±')
            means.append(float(m.strip()))
            stds.append(float(s.strip()))
        else:
            means.append(float(v))
            stds.append(0.0)
    return pd.Series(means), pd.Series(stds)

metric_cols = ['precision','recall','density','coverage']
mean_cols, std_cols = {}, {}
for col in metric_cols:
    df[col+"_mean"], df[col+"_std"] = split_mean_std(col)
    mean_cols[col] = col+"_mean"
    std_cols[col] = col+"_std"

In [38]:
# --- Helper to find best value ---
def best_color(metric, value):
    return 'red' if value==df[metric+"_mean"].max() else 'blue'

In [39]:
# --- Plot reg vs metrics with shaded std ---
for metric in metric_cols:
    plt.figure(figsize=(7,5))
    mean = df[metric+"_mean"]
    std = df[metric+"_std"]
    
    plt.plot(df['Reg'], mean, 'o-', label=f"{metric} mean", color='blue')
    plt.fill_between(df['Reg'], mean-std, mean+std, color='blue', alpha=0.2, label="±1 std")
    
    # Highlight best value
    best_idx = mean.idxmax()
    plt.scatter(df.loc[best_idx,'Reg'], mean[best_idx],
                color='green' if metric in ['precision','density'] else 'red', s=100, label="Best")
    
    plt.title(f"Reg vs {metric} (mean ± std shaded)")
    plt.xlabel("Reg")
    plt.ylabel(metric)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_folder, f"reg_vs_{metric}.png"))
    plt.close()


In [40]:
# --- Plot percentage differences ---
for metric in metric_cols:
    pct_col = metric+"_pct_diff"
    if pct_col in df.columns:
        plt.figure(figsize=(7,5))
        sns.barplot(
            x='Reg', 
            y=pct_col, 
            data=df,      # assign hue
            dodge=False,    # no dodging needed
            legend=False    # hide legend
        )
        plt.title(f"Reg vs {metric} % difference")
        plt.xlabel("Reg")
        plt.ylabel(f"{metric} % difference")
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(save_folder, f"reg_vs_{metric}_pct_diff.png"))
        plt.close()

In [41]:
# --- Best reg "confusion matrix" style heatmap ---
best_regs = []
for metric in metric_cols:
    if metric in ['precision','density']:
        best_reg = df.loc[df[metric+"_mean"].idxmax(),'Reg']
    else:
        best_reg = df.loc[df[metric+"_mean"].idxmin(),'Reg']
    best_regs.append(best_reg)

heatmap_data = pd.DataFrame({'Metric': metric_cols, 'Best_Reg': best_regs})
plt.figure(figsize=(8,4))
sns.heatmap(heatmap_data.set_index('Metric').T, annot=True, fmt='', cmap='viridis', cbar=False)
plt.title("Best Reg per Metric")
plt.tight_layout()
plt.savefig(os.path.join(save_folder, "best_reg_heatmap.png"))
plt.close()

print("All plots saved in:", save_folder)

All plots saved in: logs/final_step_running_2025-08-31_22-45-45/Swiss_Roll
