#### OPT-ML Mini project

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
from pathlib import Path
import re
import plotly.express as px

#### 1. Load initial data files containing experiments and retrieve execution parameters and metrics

(In case you do not have the initial run.txt files go directly to (2))

In [2]:
list_of_optimizers = ['SGD', 'Adam', 'ExtraAdam']
root_dir = './backup/tensorboard/'
save_dir = './data/experiments.csv'

#saves execution parameters and metrics for each iteration/epoch of each experiment
metrics = []

#saves all subdirectories containing run.txt files
list_sub_dirs = []
count_exps = 0

# list child dirs recursively
def listdirs(rootdir):
    for path in Path(rootdir).iterdir():
        if path.is_dir():
            list_sub_dirs.append(path)
            listdirs(path)

listdirs(root_dir)


for exp_dir in list_sub_dirs:
    optimizer = exp_dir.parent.name
    
    # parse file name which contains execution parameters
    if optimizer in list_of_optimizers:
        count_exps+=1
        
        lr = re.findall(r'lr[0-9].[0-9]+', exp_dir.name)
        if lr:
            lrG = lrD = lr[0].split('lr')[1]
        else:
            lrG = re.findall(r'lrG[0-9].[0-9]+', exp_dir.name)[0].split('lrG')[1]
            lrD = re.findall(r'lrD[0-9].[0-9]+', exp_dir.name)[0].split('lrD')[1]
    
        bsz = re.findall(r'bsz[0-9]+', exp_dir.name)[0].split('bsz')[1]
        beta = re.findall(r'beta[0-9].[0-9]', exp_dir.name)[0].split('beta')[1]
        
        # parse run.txt lines with pattern matching
        with open(exp_dir.joinpath('run.txt'), 'r') as exp_file:
            for line in exp_file:
                if 'FID' in line:
                    line_dict = {}
                    
                    line_dict['optimizer'] = optimizer
                    line_dict['lrG'] = lrG
                    line_dict['lrD'] = lrD
                    line_dict['bsz'] = bsz
                    line_dict['beta'] = beta
                    
                    match_epoch_iter = re.findall(r'\[[0-9]+', line)
                    if match_epoch_iter:
                        line_dict['cur_epoch'] = match_epoch_iter[0][1:]
                        line_dict['cur_iter'] = match_epoch_iter[1][1:]

                    match_max_epoch_iter = re.findall(r'\/[0-9]+', line)
                    if match_max_epoch_iter:
                        line_dict['max_epoch'] = match_max_epoch_iter[0][1:]
                        line_dict['max_iter'] = match_max_epoch_iter[1][1:]

                    is_mean_match = re.findall(r'IS_mean: [0-9]+.[0-9]+', line)
                    if is_mean_match:
                        line_dict['IS_mean'] = is_mean_match[0].split()[1]

                    is_std_match = re.findall(r'IS_std: [0-9]+.[0-9]+', line)
                    if is_std_match:
                        line_dict['IS_std'] = is_std_match[0].split()[1]    

                    fid_match = re.findall(r'FID: [0-9]+.[0-9]+', line)
                    if fid_match:
                        line_dict['FID'] = fid_match[0].split()[1]

                    metrics.append(line_dict)
            
        #print(40*'#')
        print(exp_dir)
        #print(f'optimizer={optimizer},lrG={lrG},lrD={lrD},bsz={bsz},beta={beta}')
        
metrics_df = pd.DataFrame(metrics)
print(f'Total parsed experiments: {count_exps}')

backup\tensorboard\Adam\gan_ep100_lrD0.0001_lrG0.0001_bsz128_imsz64optbeta0.92022-06-08
backup\tensorboard\Adam\gan_ep100_lrD0.0001_lrG0.0002_bsz128_imsz64optbeta0.52022-06-08T
backup\tensorboard\Adam\gan_ep100_lrD0.0001_lrG0.0002_bsz128_imsz64optbeta0.92022-06-09
backup\tensorboard\Adam\gan_ep100_lrD0.0001_lrG0.0005_bsz128_imsz64optbeta0.52022-06-08
backup\tensorboard\Adam\gan_ep100_lrD0.0001_lrG0.0005_bsz128_imsz64optbeta0.92022-06-09
backup\tensorboard\Adam\gan_ep100_lrD0.0002_lrG0.0001_bsz128_imsz64optbeta0.52022-06-07
backup\tensorboard\Adam\gan_ep100_lrD0.0002_lrG0.0001_bsz128_imsz64optbeta0.92022-06-08
backup\tensorboard\Adam\gan_ep100_lrD0.0002_lrG0.0005_bsz128_imsz64optbeta0.52022-06-08
backup\tensorboard\Adam\gan_ep100_lrD0.0002_lrG0.0005_bsz128_imsz64optbeta0.92022-06-09
backup\tensorboard\Adam\gan_ep100_lrD0.0005_lrG0.0001_bsz128_imsz64optbeta0.52022-06-07
backup\tensorboard\Adam\gan_ep100_lrD0.0005_lrG0.0001_bsz128_imsz64optbeta0.92022-06-09
backup\tensorboard\Adam\gan_ep1

In [3]:
# create a name for each type of experiment (for grouping/plotting purposes)
metrics_df['name'] = metrics_df['optimizer']+ \
                        '_lrG'+metrics_df['lrG']+ \
                        '_lrD'+metrics_df['lrD']+ \
                        '_beta'+metrics_df['beta']

# convert object columns to int type
int_cols = ['bsz', 'cur_epoch', 'cur_iter', 'max_epoch', 'max_iter']
for col in int_cols:
    metrics_df = metrics_df.astype({col:'int'})

# convert object columns to float type
float_cols = ['lrG', 'lrD', 'beta', 'IS_mean', 'IS_std', 'FID']
for col in float_cols:
    metrics_df = metrics_df.astype({col:'float'})
    
# compute number of iterations
metrics_df['iter'] = metrics_df['cur_epoch']*metrics_df['max_iter'] + metrics_df['cur_iter']

# save to csv
metrics_df.to_csv(save_dir, index=False)

metrics_df.head()

Unnamed: 0,optimizer,lrG,lrD,bsz,beta,cur_epoch,cur_iter,max_epoch,max_iter,IS_mean,IS_std,FID,name,iter
0,Adam,0.0001,0.0001,128,0.9,0,0,100,1583,1.0021,0.0002,481.7397,Adam_lrG0.0001_lrD0.0001_beta0.9,0
1,Adam,0.0001,0.0001,128,0.9,0,250,100,1583,1.0058,0.0002,470.6305,Adam_lrG0.0001_lrD0.0001_beta0.9,250
2,Adam,0.0001,0.0001,128,0.9,0,500,100,1583,1.0113,0.0005,448.0988,Adam_lrG0.0001_lrD0.0001_beta0.9,500
3,Adam,0.0001,0.0001,128,0.9,0,750,100,1583,1.0147,0.0006,405.5317,Adam_lrG0.0001_lrD0.0001_beta0.9,750
4,Adam,0.0001,0.0001,128,0.9,0,1000,100,1583,1.0168,0.0007,368.3643,Adam_lrG0.0001_lrD0.0001_beta0.9,1000


#### 2. Load execution parameters and metrics for each experiment

In [6]:
load_dir = './data/experiments.csv'
metrics_df = pd.read_csv(load_dir)
display(metrics_df.size)
metrics_df.head()

423052

Unnamed: 0,optimizer,lrG,lrD,bsz,beta,cur_epoch,cur_iter,max_epoch,max_iter,IS_mean,IS_std,FID,name,iter
0,Adam,0.0001,0.0001,128,0.9,0,0,100,1583,1.0021,0.0002,481.7397,Adam_lrG0.0001_lrD0.0001_beta0.9,0
1,Adam,0.0001,0.0001,128,0.9,0,250,100,1583,1.0058,0.0002,470.6305,Adam_lrG0.0001_lrD0.0001_beta0.9,250
2,Adam,0.0001,0.0001,128,0.9,0,500,100,1583,1.0113,0.0005,448.0988,Adam_lrG0.0001_lrD0.0001_beta0.9,500
3,Adam,0.0001,0.0001,128,0.9,0,750,100,1583,1.0147,0.0006,405.5317,Adam_lrG0.0001_lrD0.0001_beta0.9,750
4,Adam,0.0001,0.0001,128,0.9,0,1000,100,1583,1.0168,0.0007,368.3643,Adam_lrG0.0001_lrD0.0001_beta0.9,1000


#### 3. Create line-plots

In [None]:
#metrics_df[~metrics_df['name'].isin(div_opt_name)]['name'].unique()
div_opt_df = metrics_df.groupby('name')['FID'].min().reset_index()
div_opt_name = div_opt_df[div_opt_df['FID']>20]['name'].values
div_metrics_df = metrics_df[metrics_df['name'].isin(div_opt_name)]
fig = px.line(div_metrics_df, x="iter", y="FID", color='name')
fig.show()

In [None]:
fig = px.line(metrics_df[metrics_df['beta']==0.9], x="iter", y="FID", color='name')
fig.show()

In [None]:
fig = px.line(metrics_df[metrics_df['optimizer']=='Adam'], x="iter", y="FID", color='name')
fig.show()

In [None]:
fig = px.line(metrics_df, x="iter", y="FID", color='name')
fig.show()