In [1]:
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
import os
import glob

In [15]:
# Define the base directory (dataset directory)
base_dir = '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/'
os.makedirs(base_dir, exist_ok=True)  # Ensure directory exists

# Find all relevant CSV paths
csv_paths = glob.glob(os.path.join(base_dir, '*/metrics_all_regs_with_diffs.csv'))

# Metrics of interest
metrics = ['precision', 'recall', 'density', 'coverage']

# Function to parse mean from "value ± std"
def parse_mean(val):
    if isinstance(val, str) and '±' in val:
        return float(val.split('±')[0].strip())
    return float(val)

# Function to parse percentage (e.g., "0.22%" -> 0.22)
def parse_pct(val):
    if isinstance(val, str) and '%' in val:
        return float(val.strip('%'))
    return float(val)

# Function to compute radar area
def compute_radar_area(values):
    N = len(values)
    if N == 0:
        return 0
    sin_term = np.sin(2 * np.pi / N)
    area = 0.5 * sin_term * sum(values[i] * values[(i + 1) % N] for i in range(N))
    return area

# Get baseline from first CSV
if csv_paths:
    first_df = pd.read_csv(csv_paths[0])
    for metric in metrics:
        first_df[metric] = first_df[metric].apply(parse_mean)
    baseline_row = first_df[first_df['Reg'] == 0].iloc[0]
    baseline_values = [baseline_row[metric] for metric in metrics]
    baseline_area = compute_radar_area(baseline_values)
else:
    baseline_values = [0] * len(metrics)  # Fallback, though unlikely
    baseline_area = 0

In [16]:
print(csv_paths)

['../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/skew/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/mmd_rbf/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/kurt/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/mmd_linear/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/iso/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/var_mi/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/kl/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/mean_l2/metrics_all_regs_with_diffs.csv', '../logs/final_step_running_2025-08-

In [17]:
# Dictionaries to collect data for bar and radar plots
pct_increases = {}
abs_increases = {}
radar_values = {}
area_diff_pct = {}

for csv_path in csv_paths:
    # Extract regularizer name from path (e.g., 'iso', 'kl')
    regularizer = os.path.basename(os.path.dirname(csv_path))
    
    # Load CSV
    df = pd.read_csv(csv_path)
    
    # Parse the metric columns (absolute values)
    for metric in metrics:
        df[metric] = df[metric].apply(parse_mean)
    
    # Parse the pct_diff and abs_diff columns
    for metric in metrics:
        pct_col = f'{metric}_pct_diff'
        abs_col = f'{metric}_abs_diff'
        if pct_col in df.columns:
            df[pct_col] = df[pct_col].apply(parse_pct)
        if abs_col in df.columns:
            df[abs_col] = df[abs_col].apply(float)  # abs_diff are floats without %
    
    # Find the row with the best (maximum) density percentage increase
    best_idx = df['density_pct_diff'].idxmax()
    best_row = df.loc[best_idx]
    best_reg = best_row['Reg']
    
    # Collect pct and abs increases for bar plots
    pct_increases[regularizer] = {metric: best_row[f'{metric}_pct_diff'] for metric in metrics}
    abs_increases[regularizer] = {metric: best_row[f'{metric}_abs_diff'] for metric in metrics}
    
    # Collect absolute metric values for radar plot
    radar_values[regularizer] = [best_row[metric] for metric in metrics]
    
    # Calculate area percentage difference for bar plot
    variant_area = compute_radar_area(radar_values[regularizer])
    area_diff_pct[regularizer] = (variant_area - baseline_area) * 100 / baseline_area if baseline_area != 0 else 0
    
    # Plot 2: Line plots for this regularizer (reg vs metrics, with best marked)
    fig, ax = plt.subplots(figsize=(10, 6))
    for metric in metrics:
        pct_increase = best_row[f'{metric}_pct_diff']
        label = f'{metric.capitalize()} (+{pct_increase:.2f}%)'
        ax.plot(df['Reg'], df[metric], label=label)
        
        # Mark the best point (based on density increase)
        best_y = best_row[metric]
        ax.scatter(best_reg, best_y, color='red', zorder=5)
    
    ax.set_xlabel('Reg')
    ax.set_ylabel('Metric Value')
    ax.set_title(f'{regularizer.capitalize()} - Reg vs Metrics (Best by Density Increase Marked)')
    ax.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(base_dir, f'{regularizer}_line_metrics.png'))
    plt.close()

In [18]:
# Convert collections to DataFrames for bar plots
pct_df = pd.DataFrame(pct_increases).T
abs_df = pd.DataFrame(abs_increases).T

# Plot 1a: Bar plot for percentage increases
fig, ax = plt.subplots(figsize=(12, 6))
pct_df.plot(kind='bar', ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.xlabel('Regularizer')
plt.ylabel('Percentage Increase (%)')
plt.title('Percentage Increases at Best Density Reg for Each Metric')
plt.legend(title='Metric')
for container in ax.containers:
    ax.bar_label(container, fmt='%.3f')
plt.tight_layout()
plt.savefig(os.path.join(base_dir, 'percentage_increases_bar.png'))
plt.close()

# Plot 1b: Bar plot for absolute increases
fig, ax = plt.subplots(figsize=(12, 6))
abs_df.plot(kind='bar', ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.xlabel('Regularizer')
plt.ylabel('Absolute Increase')
plt.title('Absolute Increases at Best Density Reg for Each Metric')
plt.legend(title='Metric')
for container in ax.containers:
    ax.bar_label(container, fmt='%.3f')
plt.tight_layout()
plt.savefig(os.path.join(base_dir, 'absolute_increases_bar.png'))
plt.close()

# Plot 3a: Radar Plot with Plotly (only methods, with transparency)
categories = [m.capitalize() for m in metrics]
categories += categories[:1]  # Close the plot

fig_radar = go.Figure()

# Define a color palette for better aesthetics
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
# Convert hex colors to RGBA with alpha=0.3 for fill
rgba_colors = [f'rgba({int(c[1:3], 16)}, {int(c[3:5], 16)}, {int(c[5:7], 16)}, 0.3)' for c in colors]

i = 0
for regularizer, vals in radar_values.items():
    values = vals + vals[:1]  # Close the plot
    
    fig_radar.add_trace(go.Scatterpolar(
        r=values,
        theta=categories,
        fill='toself',
        name=regularizer.capitalize(),
        line=dict(color=colors[i % len(colors)]),
        opacity=0.8,  # Line opacity
        fillcolor=rgba_colors[i % len(colors)],  # Fill with transparency
        hovertemplate='%{theta}: %{r:.3f}'
    ))
    i += 1

# Customize the layout for a beautiful appearance
fig_radar.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0.9, 1.0],  # Adjust based on data range
            showline=True,
            tickfont=dict(size=12),
            gridcolor='#d3d3d3'
        ),
        angularaxis=dict(
            tickfont=dict(size=14, family='Arial, bold'),
            gridcolor='#d3d3d3'
        ),
        bgcolor='rgba(245, 245, 245, 0.8)'
    ),
    showlegend=True,
    legend=dict(
        x=1.1,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0.9)',
        bordercolor='gray',
        borderwidth=1,
        font=dict(size=12)
    ),
    title=dict(
        text='Radar Plot of Metrics at Best Density Reg',
        x=0.5,
        xanchor='center',
        y=0.95,
        font=dict(size=16, family='Arial, bold')
    ),
    margin=dict(l=50, r=50, t=80, b=50),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Save the radar plot
# fig_radar.write_html(os.path.join(base_dir, 'radar_metrics.html'))
fig_radar.write_image(os.path.join(base_dir, 'radar_metrics.png'), format='png', width=800, height=800)

# Plot 3b: Bar Plot for Radar Area Percentage Increase
fig_area = go.Figure()

regularizers = list(area_diff_pct.keys())
area_values = list(area_diff_pct.values())

fig_area.add_trace(go.Bar(
    x=regularizers,
    y=area_values,
    marker_color=colors[:len(regularizers)],
    text=[f'{val:.3f}%' for val in area_values],
    textposition='auto',
    hovertemplate='%{x}: %{y:.3f}%'
))

# Customize the layout for the bar plot
fig_area.update_layout(
    title=dict(
        text='Percentage Increase in Radar Area vs Baseline (Reg 0)',
        x=0.5,
        xanchor='center',
        y=0.95,
        font=dict(size=16, family='Arial, bold')
    ),
    xaxis=dict(
        title='Regularizer',
        tickfont=dict(size=12)
    ),
    yaxis=dict(
        title='Area Increase (%)',
        tickfont=dict(size=12),
        gridcolor='#d3d3d3'
    ),
    showlegend=False,
    margin=dict(l=50, r=50, t=80, b=50),
    paper_bgcolor='white',
    plot_bgcolor='white'
)

# Save the area increase bar plot
# fig_area.write_html(os.path.join(base_dir, 'radar_area_increase.html'))
fig_area.write_image(os.path.join(base_dir, 'radar_area_increase.png'), format='png', width=800, height=600)

print(f'All plots saved in {base_dir}')

All plots saved in ../logs/final_step_running_2025-08-31_10-49-51/Moon_with_two_circles_unbounded/
