In [8]:
import pandas as pd
import numpy as np
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Hardcoded parameters for the specific sweep
entity_name = "dive-ci"
project_name = "CLadder"
sweep_id = "1m6rhsc5"

# Initialize wandb API
api = wandb.Api()

# Get the sweep runs
sweep = api.sweep(f"{entity_name}/{project_name}/{sweep_id}")

# Collect run data
runs_data = []
for run in sweep.runs:
    # Get run summary and config
    run_data = {
        "id": run.id,
        "name": run.name,
        "state": run.state,
        "created_at": run.created_at,
        "summary": run.summary._json_dict if hasattr(run.summary, "_json_dict") else {},
        "config": run.config
    }
    
    # Extract relevant metrics and parameters
    summary = run_data["summary"]
    config = run_data["config"]
    print(run)
    
    run_info = {
        "run_id": run.id,
        "run_name": run.name,
        "reasoning": config.get("experiment.reasoning", None),
        "anonymize": config.get("dataset.anonymize", None),
        "percent_train": config.get("dataset.percent_of_train_dataset", None),
        "num_epochs": config.get("training.num_train_epochs", None),
    }
    
    # Extract scores for different test datasets
    if "scores" in summary:
        scores = summary["scores"]
        for dataset, score in scores.items():
            dataset_name = dataset.split('/')[-1]
            run_info[f"score_{dataset_name}"] = score
    
    runs_data.append(run_info)

# Convert to dataframe
df = pd.DataFrame(runs_data)

# Identify test dataset columns
test_dataset_columns = [col for col in df.columns if col.startswith('score_')]
training_percentages = sorted(df['percent_train'].unique())

print(f"Found {len(df)} runs with {len(test_dataset_columns)} test datasets")
print(f"Test datasets: {[col.replace('score_', '') for col in test_dataset_columns]}")
print(f"Training percentages: {training_percentages}")
df

<Run dive-ci/CLadder/ljwxyquo (finished)>
<Run dive-ci/CLadder/ff1qeenz (finished)>
<Run dive-ci/CLadder/k379asqp (finished)>
<Run dive-ci/CLadder/sflre95n (finished)>
<Run dive-ci/CLadder/x7mq1iux (finished)>
<Run dive-ci/CLadder/a33cwkow (finished)>
<Run dive-ci/CLadder/a4tdtq9k (finished)>
<Run dive-ci/CLadder/8dlkbkop (finished)>
<Run dive-ci/CLadder/rku4rrap (finished)>
<Run dive-ci/CLadder/xsgjxj93 (finished)>
<Run dive-ci/CLadder/w9slqfma (finished)>
<Run dive-ci/CLadder/xmfr3oak (finished)>
<Run dive-ci/CLadder/nndmt1w9 (finished)>
<Run dive-ci/CLadder/tv6kml36 (finished)>
<Run dive-ci/CLadder/wcamp9ei (finished)>
<Run dive-ci/CLadder/7wfqo68v (finished)>
<Run dive-ci/CLadder/e04jmux5 (finished)>
<Run dive-ci/CLadder/c2b54jw1 (finished)>
<Run dive-ci/CLadder/8loyepoy (finished)>
<Run dive-ci/CLadder/nbmvptcx (finished)>
<Run dive-ci/CLadder/deqtc0b2 (finished)>
<Run dive-ci/CLadder/nh3tzoia (finished)>
<Run dive-ci/CLadder/m5hnt31j (finished)>
<Run dive-ci/CLadder/9w73194v (fin

Unnamed: 0,run_id,run_name,reasoning,anonymize,percent_train,num_epochs
0,ljwxyquo,QWen-RTrue-AbsFalse-DP0.1-EP16,True,False,0.1,16
1,ff1qeenz,QWen-RTrue-AbsFalse-DP0.1-EP8,True,False,0.1,8
2,k379asqp,QWen-RTrue-AbsFalse-DP0.1-EP4,True,False,0.1,4
3,sflre95n,QWen-RTrue-AbsFalse-DP0.1-EP2,True,False,0.1,2
4,x7mq1iux,QWen-RTrue-AbsFalse-DP0.1-EP1,True,False,0.1,1
5,a33cwkow,QWen-RTrue-AbsFalse-DP0.05-EP16,True,False,0.05,16
6,a4tdtq9k,QWen-RTrue-AbsFalse-DP0.05-EP8,True,False,0.05,8
7,8dlkbkop,QWen-RTrue-AbsFalse-DP0.05-EP4,True,False,0.05,4
8,rku4rrap,QWen-RTrue-AbsFalse-DP0.05-EP2,True,False,0.05,2
9,xsgjxj93,QWen-RTrue-AbsFalse-DP0.05-EP1,True,False,0.05,1


In [32]:
sweep = api.sweep(f"{entity_name}/{project_name}/{sweep_id}")
sweep.runs[0].config["dataset.percent_of_train_dataset"]
# for key in sweep.runs[1].__dir__():
#     print(sweep.runs[0].getattr(key))



0.1

In [1]:

# Create comparison plots
def create_anonymize_comparison_plots():
    # Create a subplot for each test dataset and training percentage combination
    num_test_sets = len(test_dataset_columns)
    num_percentages = len(training_percentages)
    
    fig = make_subplots(
        rows=num_percentages, 
        cols=num_test_sets,
        subplot_titles=[f"{col.replace('score_', '')} - {pct*100}% Training" 
                        for pct in training_percentages for col in test_dataset_columns],
        vertical_spacing=0.12,
        horizontal_spacing=0.05
    )
    
    # Color scheme for True/False anonymization
    colors = {"True": "rgb(31, 119, 180)", "False": "rgb(255, 127, 14)"}
    
    for i, pct in enumerate(training_percentages):
        for j, test_col in enumerate(test_dataset_columns):
            dataset_name = test_col.replace('score_', '')
            
            # Filter data for this training percentage
            df_pct = df[df['percent_train'] == pct]
            
            # Group by anonymization and epochs, calculate mean scores
            for anon in [True, False]:
                df_group = df_pct[df_pct['anonymize'] == anon]
                
                # Skip if no data for this combination
                if len(df_group) == 0:
                    continue
                
                # Get average scores for each epoch
                epoch_scores = df_group.groupby('num_epochs')[test_col].mean().reset_index()
                
                # Add line to plot
                fig.add_trace(
                    go.Scatter(
                        x=epoch_scores['num_epochs'],
                        y=epoch_scores[test_col],
                        mode='lines+markers',
                        name=f"Anonymize={anon}",
                        line=dict(color=colors[str(anon)]),
                        legendgroup=f"Anonymize={anon}",
                        showlegend=(i==0 and j==0) # Only show legend once
                    ),
                    row=i+1, col=j+1
                )
            
            # Update axis labels
            if i == num_percentages-1:
                fig.update_xaxes(title_text="Number of Epochs", row=i+1, col=j+1)
            if j == 0:
                fig.update_yaxes(title_text="Score", row=i+1, col=j+1)
    
    # Update layout
    fig.update_layout(
        height=300*num_percentages,
        width=400*num_test_sets,
        title_text="Anonymize vs Non-Anonymize Performance Comparison",
        legend_title="Anonymization Setting",
        margin=dict(t=50, b=20, l=20, r=20),
    )
    
    return fig

# Create and display the comparison plots
comparison_fig = create_anonymize_comparison_plots()
comparison_fig.show()

# Create a simplified single-row comparison for easier viewing
def create_simplified_comparison():
    # Create one row of plots, one for each test dataset
    fig = make_subplots(
        rows=1, 
        cols=len(test_dataset_columns),
        subplot_titles=[col.replace('score_', '') for col in test_dataset_columns],
        horizontal_spacing=0.1
    )
    
    # Color scheme for different training percentages
    colors = px.colors.qualitative.Plotly
    
    for j, test_col in enumerate(test_dataset_columns):
        dataset_name = test_col.replace('score_', '')
        
        # For each training percentage
        for i, pct in enumerate(training_percentages):
            # Filter data for this training percentage
            df_pct = df[df['percent_train'] == pct]
            
            # For anonymize=True and anonymize=False
            for k, anon in enumerate([True, False]):
                df_group = df_pct[df_pct['anonymize'] == anon]
                
                # Skip if no data for this combination
                if len(df_group) == 0:
                    continue
                
                # Get average scores for each epoch
                epoch_scores = df_group.groupby('num_epochs')[test_col].mean().reset_index()
                
                # Add line to plot
                fig.add_trace(
                    go.Scatter(
                        x=epoch_scores['num_epochs'],
                        y=epoch_scores[test_col],
                        mode='lines+markers',
                        name=f"{pct*100}% Train, Anon={anon}",
                        line=dict(
                            color=colors[i % len(colors)],
                            dash='dash' if anon else 'solid'
                        ),
                    ),
                    row=1, col=j+1
                )
            
            # Update axis labels
            fig.update_xaxes(title_text="Number of Epochs", row=1, col=j+1)
            if j == 0:
                fig.update_yaxes(title_text="Score", row=1, col=j+1)
    
    # Update layout
    fig.update_layout(
        height=500,
        width=350*len(test_dataset_columns),
        title_text="Anonymize vs Non-Anonymize Performance Comparison",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.3,
            xanchor="center",
            x=0.5
        ),
        margin=dict(b=100)
    )
    
    return fig

# Create and display the simplified comparison
simple_fig = create_simplified_comparison()
simple_fig.show()

Fetching sweep results...
Found 40 runs with 0 test datasets
Test datasets: []
Training percentages: [0.01, 0.02, 0.05, 0.1]


ValueError: 
The 'cols' argument to make_subplots must be an int greater than 0.
    Received value of type <class 'int'>: 0