In [1]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt

In [12]:

# Initialize wandb API
api = wandb.Api()

# Replace 'your_sweep_id' with your actual sweep ID
sweep_id = 'maximes_crew/S3_SHD_runs/pqc7ir1o'

# Fetch the sweep object
sweep = api.sweep(sweep_id)

# Fetch all runs in the sweep
runs = sweep.runs

sweep_params = ['lr', 'pdrop', 'scheduler_patience', 'scheduler_factor', 'dt_max', 'n_layers', 'n_hiddens']

In [13]:
runs

<Runs maximes_crew/S3_SHD_runs>

In [14]:
# Initialize an empty DataFrame to hold the relevant data
data = []

# Extract the necessary data from each run
for run in runs:
    config = run.config
    summary = run.summary
    n_layers = config.get('nb_layers')
    n_hiddens = config.get('nb_hiddens')
    lr = config.get('lr')
    dt_min = config.get('dt_min')
    dt_max = config.get('dt_max')
    pdrop = config.get('pdrop')
    sched_patience = config.get('scheduler_patience')
    sched_factor = config.get('scheduler_factor')
    best_valid_acc = summary.get('best valid acc')

    if best_valid_acc is not None:
        data.append({
            'n_layers': n_layers,
            'n_hiddens': n_hiddens,
            'lr': lr,
            'dt_min': dt_min,
            'dt_max': dt_max,
            'pdrop' : pdrop,
            'scheduler_patience' : sched_patience,
            'scheduler_factor' : sched_factor,
            'best_valid_acc': best_valid_acc
        })



In [15]:
data

[{'n_layers': 3,
  'n_hiddens': 128,
  'lr': 0.001,
  'dt_min': 0.01,
  'dt_max': 1.5,
  'pdrop': 0.5,
  'scheduler_patience': 10,
  'scheduler_factor': 0.9,
  'best_valid_acc': 0.9075126262626264},
 {'n_layers': 3,
  'n_hiddens': 128,
  'lr': 0.001,
  'dt_min': 0.01,
  'dt_max': 1.5,
  'pdrop': 0.5,
  'scheduler_patience': 10,
  'scheduler_factor': 0.9,
  'best_valid_acc': 0.9230981691919192},
 {'n_layers': 3,
  'n_hiddens': 128,
  'lr': 0.001,
  'dt_min': 0.01,
  'dt_max': 1.5,
  'pdrop': 0.5,
  'scheduler_patience': 10,
  'scheduler_factor': 0.9,
  'best_valid_acc': 0.9380523989898992},
 {'n_layers': 3,
  'n_hiddens': 128,
  'lr': 0.001,
  'dt_min': 0.01,
  'dt_max': 1.5,
  'pdrop': 0.5,
  'scheduler_patience': 10,
  'scheduler_factor': 0.9,
  'best_valid_acc': 0.9447995580808082},
 {'n_layers': 3,
  'n_hiddens': 128,
  'lr': 0.001,
  'dt_min': 0.01,
  'dt_max': 1.5,
  'pdrop': 0.5,
  'scheduler_patience': 10,
  'scheduler_factor': 0.9,
  'best_valid_acc': 0.9018308080808082},
 {'n_

In [16]:
# Convert the data to a DataFrame for easier manipulation
df = pd.DataFrame(data)

# Calculate the number of runs for each line configuration
df['run_count'] = df.groupby(sweep_params).transform('size')

# Group by n_layers and lr, then compute the mean of best_valid_acc for each group
df_grouped = df.groupby(sweep_params+ ['run_count']).agg({'best_valid_acc':['mean', 'std']}).reset_index()


df_grouped_sorted = df_grouped.sort_values(by=('best_valid_acc','mean'), ascending=False).reset_index(drop=True)

In [17]:
df_grouped_sorted[0:10]

Unnamed: 0_level_0,lr,pdrop,scheduler_patience,scheduler_factor,dt_max,n_layers,n_hiddens,run_count,best_valid_acc,best_valid_acc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,mean,std
0,0.01,0.5,5,0.9,0.5,3,128,5,0.95056,0.003733
1,0.005,0.25,10,0.7,1.0,3,128,5,0.9497,0.003868
2,0.01,0.1,10,0.9,0.5,3,128,5,0.949345,0.00851
3,0.01,0.25,10,0.7,0.5,3,128,5,0.9491,0.004611
4,0.01,0.5,10,0.9,0.5,3,128,5,0.949029,0.007399
5,0.005,0.1,5,0.9,1.5,3,128,5,0.948706,0.004821
6,0.01,0.25,5,0.7,1.5,3,128,5,0.948603,0.003237
7,0.01,0.1,5,0.9,0.5,3,128,5,0.948595,0.009733
8,0.005,0.25,10,0.9,0.5,3,128,5,0.94858,0.002867
9,0.01,0.25,5,0.9,1.5,3,128,5,0.948232,0.00544


In [83]:
# Convert the data to a DataFrame for easier manipulation
df = pd.DataFrame(data)


df_grouped = df.groupby(sweep_params).agg({'best_valid_acc': ['mean', 'std']}).reset_index()

df_grouped_sorted = df_grouped.sort_values(by=('best_valid_acc','mean'), ascending=False).reset_index(drop=True)

In [70]:
df_grouped_sorted[0:10]

Unnamed: 0_level_0,lr,pdrop,scheduler_patience,scheduler_factor,dt_max,n_layers,n_hiddens,best_valid_acc,best_valid_acc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,mean,std
0,0.005,0.25,10,0.9,0.5,3,512,0.955437,0.003939
1,0.01,0.1,10,0.7,0.5,3,512,0.952336,0.005383
2,0.01,0.1,5,0.7,0.5,3,512,0.951081,0.005959
3,0.005,0.25,5,0.9,0.5,3,512,0.94914,0.004174
4,0.005,0.5,5,0.7,0.5,3,512,0.940664,0.008965
5,0.005,0.25,10,0.7,0.5,3,512,0.939986,
