In [2]:
import sys
sys.path.append('../30_data_tools/')

In [1]:
import plotly.express as px
import json
from datetime import datetime
import pandas as pd

In [3]:
from file_interaction import download_blob, get_blobs
import plotly.graph_objects as go

In [4]:
available_experiments = get_blobs(filter='train_logs/')
available_experiments[-10:]

['train_logs/2024-04-13_MobileNetV3_003.json',
 'train_logs/2024-04-25_Resnet50_001.json',
 'train_logs/2024-05-01_Resnet50_001.json',
 'train_logs/2024-05-01_Resnet50_002.json',
 'train_logs/2024-05-01_Resnet50_003.json',
 'train_logs/2024-05-01_Resnet50_004.json',
 'train_logs/2024-05-02_Resnet50_001.json',
 'train_logs/2024-05-02_Resnet50_002.json',
 'train_logs/2024-05-04_MobileNetV3_001.json',
 'train_logs/2024-05-04_Resnet50_001.json']

In [5]:
def load_experiment( experiment_name ):
    json_buffer = download_blob( f'train_logs/{ experiment_name }.json' )
    data = json.loads( json_buffer.getvalue() )
    data['start_timestamp'] = datetime.fromisoformat(data['start_timestamp'])

    if 'end_timestamp' in data:
        data['end_timestamp'] = datetime.fromisoformat(data['end_timestamp'])

    for t in data['tracked_metrics']:
        t['timestamp'] = datetime.fromisoformat(t['timestamp'])
        t['value'] = float(t['value'])
    
    return data

In [6]:
def get_figure( data, metric_name, mode ):
    fig = px.line(
        x=data.loc[(data.metric_name == metric_name) &  (data['mode'] == mode)].step,
        y=data.loc[(data.metric_name == metric_name) &  (data['mode'] == mode)].value,
        color=data.loc[(data.metric_name == metric_name) &  (data['mode'] == mode)].epoch
    )
    #fig.update_layout(yaxis_range=[0,1])
    
    return fig

In [7]:
experiment_name = '2024-05-04_Resnet50_001'
experiment_data = load_experiment( experiment_name )
data = pd.DataFrame.from_dict(experiment_data['tracked_metrics'])

In [8]:
experiment_data['hyper_parameters']

{'dataset_name': '24-05-02_001_tile_dataset',
 'data_type': 'spatial',
 'epochs': 25,
 'learning_rate': 0.005,
 'batch_size': 64,
 'loss_function': 'CrossEntropyLoss',
 'metric_functions': ['MulticlassAccuracy',
  'MulticlassRecall',
  'MulticlassPrecision'],
 'optimizer': 'SGD',
 'device': 'cpu'}

In [9]:
data

Unnamed: 0,metric_name,value,timestamp,mode,dataset,epoch,step
0,loss,0.686538,2024-05-04 15:14:16.667867,train,train,1,0
1,accuracy,0.546875,2024-05-04 15:14:16.784539,train,train,1,0
2,recall,0.520513,2024-05-04 15:14:16.929529,train,train,1,0
3,precision,0.520833,2024-05-04 15:14:17.059920,train,train,1,0
4,loss,0.684053,2024-05-04 15:16:29.298448,train,train,1,1
5,accuracy,0.5625,2024-05-04 15:16:29.522363,train,train,1,1
6,recall,0.514844,2024-05-04 15:16:29.688344,train,train,1,1
7,precision,0.543972,2024-05-04 15:16:29.851751,train,train,1,1


In [None]:
rel_data_train = data.loc[data['mode'] == 'train'].copy()
rel_data_train.loc[:,'step_total'] = (rel_data_train.epoch - 1) * rel_data_train.step.max() + rel_data_train.step

grouped_train = rel_data_train.loc[
    :,
    ['metric_name','step_total','value']
].groupby(['metric_name','step_total']).first().unstack('metric_name')

grouped_train.columns = [c[1] for c in grouped_train.columns]

In [None]:
px.line(
    grouped_train
)

In [None]:
rel_data_val = data.loc[data['mode'] == 'val'].copy()
rel_data_val.loc[:,'step_total'] = (rel_data_val.epoch - 1) * rel_data_val.step.max() + rel_data_val.step

grouped_val = rel_data_val.loc[
    :,
    ['metric_name','step_total','value']
].groupby(['metric_name','step_total']).first().unstack('metric_name')

grouped_val.columns = [c[1] for c in grouped_val.columns]

In [None]:
(grouped_val.loss.iloc[-1] - grouped_val.loss.iloc[0]) / grouped_val.shape[0]

In [None]:
px.line(
    grouped_val
)

In [None]:
get_figure(
    data,
    'loss',
    'train'
)

In [None]:
get_figure(
    data,
    'loss',
    'val'
)

In [None]:
get_figure(
    data,
    'accuracy',
    'val'
)

In [None]:
get_figure(
    data,
    'recall',
    'val'
)

In [None]:
get_figure(
    data,
    'precision',
    'val'
)