In [1]:
from tqdm import tqdm
import wandb
import pandas as pd 
api = wandb.Api(timeout=120)
pd.set_option('display.max_rows', None)

import pylab as plt
# Project is specified by <entity/project-name>

def extract_run(tag):
    counter = 0
    failed_counter = 0
    runs = api.runs("grabus/spectralRegularization")
    summary_list = [] 
    config_list = [] 
    name_list = [] 
    tags_list = []
    runs_histories = {}
    for run in tqdm(runs): 
        # run.summary are the output key/values like accuracy.
        # We call ._json_dict to omit large files 
        #print(run)
        if not tag in run.tags:
            continue
        if run.state != 'finished':
            failed_counter += 1
            continue
        counter += 1
        summary_list.append(run.summary._json_dict) 
        # run.config is the input metrics.
        # We remove special values that start with _.

        config = {k:v for k,v in run.config.items() if not k.startswith('_')}
        config['id'] = run.id
        config_list.append(config) 

        # run.name is the name of the run.
        name_list.append(run.name)    
        tags_list.append(run.tags)
        runs_histories[run.id] = run.history()

    summary_df = pd.DataFrame.from_records(summary_list) 
    config_df = pd.DataFrame.from_records(config_list) 
    name_df = pd.DataFrame({'name': name_list}) 
    tags_df = pd.DataFrame({'tags': tags_list}) 
    all_df = pd.concat([name_df, config_df,summary_df, tags_df], axis=1)
    print(f"{tag}:\n extracted {counter} runs ({failed_counter} failed runs skipped).")
    return config_df,runs_histories

def plot_results(config_df, runs_histories, metric, hyperparams_to_choose_on_val_score=[]):
    # Create table where each row corresponds to a run and each column gives
    # each metric at the best epoch (using the val_loss to select the best epoch)
    def get_best_epoch_metrics(runs,get_best_epoch):
        return dict([(k,run.loc[get_best_epoch(run)]) for k,run in runs.items()])
    get_best_epoch = lambda df: df.idxmin()['val_loss']
    best_epoch_metrics = get_best_epoch_metrics(runs_histories,get_best_epoch)

    # Join the previous table (metrics at best epoch for each run) with the hyper-parameter table
    best_epoch_metrics_and_hyperparams = config_df.set_index('id')\
                .join(pd.DataFrame.from_dict(best_epoch_metrics,orient='index'),
                      lsuffix='_config')

    # replace "hankel_russ_roul_type" col value by "no_reg" when lambd=0
    best_epoch_metrics_and_hyperparams.loc[best_epoch_metrics_and_hyperparams['lambd']<0.0001,"hankel_russ_roul_type"] = "no_reg"
    best_epoch_metrics_and_hyperparams.loc[best_epoch_metrics_and_hyperparams['stop_proba']<0.0001,"hankel_russ_roul_type"] += "_biased"

    #best_epoch_metrics_and_hyperparams.loc[best_epoch_metrics_and_hyperparams['stop_proba']<0.0001,"hankel_russ_roul_type"] += "_fixed"
    
    hyper_params = ['stop_proba','hankel_russ_roul_type','lambd','train_size','lr_config']
    hyper_params = list(set(hyper_params).difference(hyperparams_to_choose_on_val_score))
    gb = best_epoch_metrics_and_hyperparams.groupby(hyper_params)

    idx = gb.idxmin()['val_loss']
    results = best_epoch_metrics_and_hyperparams.loc[idx]
    #print(best_epoch_metrics_and_hyperparams[['stop_proba','hankel_russ_roul_type','lambd','train_size','lr_config','val_loss']].sort_values(by=['stop_proba','hankel_russ_roul_type','lambd','train_size','lr_config','val_loss']))
#     print(results[['stop_proba','hankel_russ_roul_type','lambd','train_size','val_loss']])
    
    
    gb = results.groupby(hyper_params)
    legend = []
    
    print(gb.min()[hyperparams_to_choose_on_val_score])
    for params,df in gb.min().groupby(level=list(set(hyper_params).difference(['train_size']))):
#         print(df)
#         print(df.index)
#         if params[1] != "block_diag_no_norm" and params[2] != 0:
#             continue
        #if not ("block_diag" in params or "block_diag_no_norm_biased" in params or "no_reg" in params):continue
        if not ("block_diag" == params or "block_diag_no_norm_biased" == params or "no_reg" == params):continue
        print(params)
        #print("here")
        xs = list(df.index.get_level_values('train_size'))
        ys = list(df[metric])
#         if params[1] == 'block_diag':
#             c = (0,0,params[2]*4+0.5)
#         else:
#             c = (0,params[2]*4+0.5,0)
#         if params[0]<0.01:
#             c = (0.5,c[1],c[2])
        if "no_reg" in params:
            ls = '--'
        else:
            ls = '-'

        #plt.plot(xs,ys,c=c,ls=ls)
        plt.plot(xs,ys,ls=ls)
        legend.append(params)
    plt.legend(legend)

    



In [8]:

df_tom = {}
runs_tom = {}

for n in [2,3,4,5,6,7]:
    df_tom[n],runs_tom[n] = extract_run(f'xp - tomita {n} - batch size 32 - overlap')

df_tom_no_overlap,runs_tom_no_overlap=df_tom,runs_tom

# df_tom = {}
# runs_tom = {}
# for n in [2,5,3,4,6,7]:
#     df_tom[n],runs_tom[n] = extract_run(f'xp - tomita {n} - no overlap')

# df_tom_overlap,runs_tom_overlap=df_tom,runs_tom

100%|██████████| 11816/11816 [05:58<00:00, 32.94it/s]  
 29%|██▉       | 3475/11816 [00:00<00:00, 18384.51it/s]

xp - tomita 2 - batch size 32 - overlap:
 extracted 923 runs (10 failed runs skipped).


100%|██████████| 11816/11816 [04:58<00:00, 39.56it/s]  
 22%|██▏       | 2543/11816 [00:00<00:00, 12839.63it/s]

xp - tomita 3 - batch size 32 - overlap:
 extracted 931 runs (1 failed runs skipped).


100%|██████████| 11816/11816 [09:45<00:00, 20.17it/s]  
 14%|█▎        | 1611/11816 [00:00<00:01, 9341.98it/s]

xp - tomita 4 - batch size 32 - overlap:
 extracted 929 runs (3 failed runs skipped).


100%|██████████| 11816/11816 [05:05<00:00, 38.69it/s]  
  6%|▌         | 680/11816 [00:00<00:02, 4261.88it/s]

xp - tomita 5 - batch size 32 - overlap:
 extracted 929 runs (3 failed runs skipped).


100%|██████████| 11816/11816 [04:34<00:00, 43.09it/s]  
  0%|          | 1/11816 [00:00<29:07,  6.76it/s]

xp - tomita 6 - batch size 32 - overlap:
 extracted 925 runs (6 failed runs skipped).


100%|██████████| 11816/11816 [02:54<00:00, 67.54it/s]

xp - tomita 7 - batch size 32 - overlap:
 extracted 679 runs (0 failed runs skipped).





In [261]:
df_tom_overlap,runs_tom_overlap=df_tom,runs_tom

In [5]:
%matplotlib qt 

In [14]:
# df_tom,runs_tom=df_tom_overlap,runs_tom_overlap

df_tom,runs_tom = torch.load('tomita_all_nooverlap_results.pt')

#df_tom,runs_tom=df_tom_no_overlap,runs_tom_no_overlap
plt.ioff()


for metric in "test12loss val_loss test12acc".split():# test12acc test14loss test14acc".split():
    plt.figure()
    for n in [3,4,5,6]:
        plt.rc('legend',fontsize=10)
        if len(df_tom[n]) == 0 : continue
        plt.subplot(2,3,n-1)
        print(f"***** Tomita {n} ******")
        plot_results(df_tom[n],runs_tom[n],metric,hyperparams_to_choose_on_val_score=['lr_config','lambd','stop_proba'])
        plt.yscale('log')
        plt.title(f"Tomita {df_tom[n]['tomita_number'][0]} - {metric}")
        print("\n")
                 
    plt.show()


***** Tomita 3 ******
                                      lr_config   lambd    stop_proba
train_size hankel_russ_roul_type                                     
50         block_diag                     0.050  0.0005  2.000000e-01
           block_diag_biased              0.100  0.0005  1.000000e-07
           block_diag_no_norm             0.050  0.0010  1.000000e-01
           block_diag_no_norm_biased      0.050  0.0010  1.000000e-07
           no_reg                         0.050  0.0000  2.000000e-01
100        block_diag                     0.001  0.0005  1.000000e-01
           block_diag_biased              0.005  0.0001  1.000000e-07
           block_diag_no_norm             0.001  0.0010  1.000000e-01
           block_diag_no_norm_biased      0.005  0.0001  1.000000e-07
           no_reg                         0.005  0.0000  2.000000e-01
250        block_diag                     0.010  0.0001  1.000000e-01
           block_diag_biased              0.010  0.0001  1.000000e-0

                                      lr_config   lambd    stop_proba
train_size hankel_russ_roul_type                                     
50         block_diag                     0.050  0.0005  2.000000e-01
           block_diag_biased              0.100  0.0005  1.000000e-07
           block_diag_no_norm             0.050  0.0010  1.000000e-01
           block_diag_no_norm_biased      0.050  0.0010  1.000000e-07
           no_reg                         0.050  0.0000  2.000000e-01
100        block_diag                     0.001  0.0005  1.000000e-01
           block_diag_biased              0.005  0.0001  1.000000e-07
           block_diag_no_norm             0.001  0.0010  1.000000e-01
           block_diag_no_norm_biased      0.005  0.0001  1.000000e-07
           no_reg                         0.005  0.0000  2.000000e-01
250        block_diag                     0.010  0.0001  1.000000e-01
           block_diag_biased              0.010  0.0001  1.000000e-07
           block_dia

                                      lr_config   lambd    stop_proba
train_size hankel_russ_roul_type                                     
50         block_diag                     0.010  0.0010  1.000000e-01
           block_diag_biased              0.050  0.0005  1.000000e-07
           block_diag_no_norm             0.010  0.0050  1.000000e-01
           block_diag_no_norm_biased      0.050  0.0001  1.000000e-07
           no_reg                         0.005  0.0000  2.000000e-01
100        block_diag                     0.005  0.0005  1.000000e-01
           block_diag_biased              0.050  0.0050  1.000000e-07
           block_diag_no_norm             0.010  0.0005  2.000000e-01
           block_diag_no_norm_biased      0.010  0.0001  1.000000e-07
           no_reg                         0.010  0.0000  2.000000e-01
250        block_diag                     0.050  0.0050  1.000000e-01
           block_diag_biased              0.005  0.0001  1.000000e-07
           block_dia

In [10]:
import torch

torch.save([df_tom,runs_tom], 'tomita_all_overlap_results.pt')
#all_df,runs_histories = torch.load('xp_5_all_results.pt')

In [3]:
pd.DataFrame.from_dict(best_epoch_metrics,orient='index').reset_index()
config_df.set_index('id').join(pd.DataFrame.from_dict(best_epoch_metrics,orient='index'))
s = pd.Series([1,0.1,2,0.5,3,4], index=[49, 48, 47, 0, 1, 2]) 
s.loc[s.idxmin()]

NameError: name 'best_epoch_metrics' is not defined

In [249]:
def plot():
    plt.plot(range(10),range(10))
    plt.plot(range(10),range(10,0,-1))
    plt.legend(['bla','blo'])

def plot_something_else():
    plt.plot(range(10),range(10))
    plt.legend(['bli'])
    
plt.figure()
plt.subplot(2,2,1)
plot()
print("...")
plt.title(f"Tomita")
plt.subplot(2,2,2)
plot_something_else()
plt.title(f"Tomita")
plt.show()

...


In [27]:
idx = runs_histories['us2d68ce'].idxmin()['val_loss']
runs_histories['us2d68ce'].iloc[idx]

_step                6.300000e+01
_runtime             2.100000e+01
tau                           NaN
_timestamp           1.647792e+09
test_acc-14          4.564667e-01
val_acc              5.753019e-01
train_hankel_loss    1.621967e+00
train_acc            6.024432e-01
val_aim              5.538847e-01
train_cce_loss       7.821593e-01
test_aim-12          5.201154e-01
train_loss           9.443560e-01
train_aim            5.784659e-01
test_loss-12         9.645370e-01
test_acc-12          4.607308e-01
test_aim-10          5.300909e-01
test_loss-10         8.039070e-01
test_loss-14         9.661279e-01
test_acc-10          6.010000e-01
val_loss             7.971246e-01
test_aim-14          5.431333e-01
Name: 63, dtype: float64

In [32]:
input()

runs = api.runs("grabus/spectralRegularization")

count = 0
for run in tqdm(runs): 
    # run.summary are the output key/values like accuracy.
    # We call ._json_dict to omit large files 
    if 'xp - tomita 5 - no overlap' in run.tags:
        run.delete()
        count += 1
        
print(f'deleted {count} runs')




100%|██████████| 1923/1923 [00:30<00:00, 63.23it/s]

deleted 150 runs



