In [46]:
import numpy as np
import pandas as pd
from pathlib import Path
import pylab
import seaborn as sns
import glob
import yaml

# BOILERPLATE
def read_yaml(path):
    with Path(path).open('r') as infile:
        return yaml.safe_load(infile)
def find_hparam(dic, key):
    if not isinstance(dic, dict):
        return None
    if key in dic:
        return dic[key]
    else:
        for _key, value in dic.items():
            ret = find_hparam(value, key)
            if ret is not None:
                return ret
    return None
            

# Set experiment path

In [8]:
exp_path = Path("/mnt/rg_climate_benchmark_data/experiments/pau/partition_sweeps_classification/0.20x_train_classification_v0.5_08-25-2022_16:23:43/forestnet_v1.0")

# Find all logs and respective hyperparams

In [9]:
logs = glob.glob(str(exp_path / "csv_logs/*/"))
print(f"Found {len(logs)} logs.")
hparams = read_yaml(exp_path / "sweep_config.yaml")
hparams = [k for k in hparams['parameters'] if 'distribution' in hparams['parameters'][k]]
print("Hparams:", hparams)

Found 12 logs.
Hparams: ['lr_head', 'lr_backbone', 'hidden_size', 'batch_size']


# Process logs
1. read log and pick values at the end of each epoch
2. Add ema of val accuracy
3. read hyperparameters for that specific log and add them to log

In [60]:
logs_data = []
for log in logs:
    data = pd.read_csv(str(Path(log) / 'metrics.csv')).groupby('epoch').agg('last').reset_index()
    data['val_Accuracy_ema'] = data['val_Accuracy'].ewm(alpha=0.8).mean()
    conf = read_yaml(Path(log) / 'config.yaml')
    for hparam in hparams:
        value = find_hparam(conf, hparam)
        data[hparam] = [value] * len(data)
    logs_data.append(data)

# Best Epoch
Find values corresponding to the best epoch for each log and then save them to a csv

In [94]:
logs_cat = pd.concat(logs_data, keys=list(np.arange(len(logs_data))))
idx = logs_cat.groupby(level=0).idxmax()['val_Accuracy_ema']
display(logs_cat.loc[idx])
logs_cat.to_csv(exp_path / 'results_summary.csv')

Unnamed: 0,Unnamed: 1,epoch,train_loss,current_time,step,val_loss,val_Accuracy,train_Accuracy,test_loss,test_Accuracy,val_Accuracy_ema,lr_head,lr_backbone,hidden_size,batch_size
0,14,14,1.233594,1661445000.0,164,1.737908,0.426077,0.556502,,,0.424514,0.003971,1.385379e-07,128,128
1,20,20,1.248851,1661445000.0,230,2.003838,0.409779,0.558824,,,0.407093,0.002837,4.711515e-07,256,128
2,20,20,0.806983,1661445000.0,440,4.270146,0.466822,0.794892,,,0.458141,0.007649,0.0001020504,256,64
3,28,28,0.200493,1661445000.0,608,2.774168,0.458673,0.868421,,,0.447613,0.001022,0.0002053869,256,64
4,41,41,1.451442,1661446000.0,251,1.793495,0.485448,0.695046,,,0.474782,0.004511,0.0004136277,512,256
5,18,18,1.044356,1661446000.0,113,1.592233,0.488941,0.578173,,,0.470283,0.001188,1.08026e-05,512,256
6,10,10,0.537213,1661446000.0,120,1.858647,0.467986,0.80418,,,0.467301,0.000128,0.0002712213,512,128
7,49,49,1.296903,1661446000.0,299,1.952194,0.486612,0.891641,,,0.484084,0.000204,6.921141e-05,256,256
8,38,38,0.470702,1661446000.0,428,2.395996,0.476135,0.851393,,,0.472703,0.00148,2.032247e-05,256,128
9,66,66,1.10083,1661447000.0,736,1.740534,0.413271,0.609907,,,0.409934,0.000154,3.031438e-07,256,128


In [95]:
print(exp_path / 'results_summary.csv')

/mnt/rg_climate_benchmark_data/experiments/pau/partition_sweeps_classification/0.20x_train_classification_v0.5_08-25-2022_16:23:43/forestnet_v1.0/results_summary.csv
