# Analysis

In [49]:
import os, re
from main.FolderInfos import FolderInfos

FolderInfos.init(test_without_data=True)
list_dataout_folders = os.listdir(FolderInfos.data_folder)
list_folders = []
for f in list_dataout_folders:
    full_path = FolderInfos.data_folder+f
    files = os.listdir(full_path)
    files_to_contain = [[re.compile(r".+_parameters.json$"),False]]
    for file in files:
        for i,check in enumerate(files_to_contain):
            if check[0].match(file):
                files_to_contain[i][1] = True
        if len(list(filter(lambda x:x[1] is False,files_to_contain))) == 0: # If all of the required files are in the folder
            list_folders.append(f)

print("\n".join(list_folders))
import ipywidgets as widgets
for f in list_folders:
    widgets.ToggleButton(
        value=False,
        description=f,
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='',
        icon='check' # (FontAwesome names without the `fa-` prefix)
    )
selected_folder = list_folders
import json
global_dict = {}
for f in selected_folder:
    full_path = FolderInfos.data_folder+f+FolderInfos.separator
    with open(f"{full_path}{f}parameters.json") as fp:
        global_dict[f] = json.load(fp)

2021-06-10_11h31min34s_


In [131]:
from IPython.display import display
from ipywidgets import Checkbox, VBox, IntSlider
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import main.src.analysis.tools as tls

def loss_access(x,mode):
    return x["training"][f"{mode}_loss"]
def metrics_access(x,name,mode):
    return x["metrics"]["attr_list_metrics"][name][f"{mode}_values"]

dico_loss = {"tr_loss":lambda x:loss_access(x,mode="tr"),"valid_loss":lambda x:loss_access(x,mode="valid")}
# raise Exception()
dico_accuracy = {"tr_accuracy_classification-0.1":lambda x:metrics_access(x,"accuracy_classification-0.1","tr"),
                 "valid_accuracy_classification-0.1":lambda x:metrics_access(x,"accuracy_classification-0.1","valid"),
                 "tr_accuracy_classification-0.25":lambda x:metrics_access(x,"accuracy_classification-0.25","tr"),
                 "valid_accuracy_classification-0.25":lambda x:metrics_access(x,"accuracy_classification-0.25","valid"),
                 }
dico_mae = {"tr_mae":lambda x:metrics_access(x,"mae","tr"),
            "valid_mae":lambda x:metrics_access(x,"mae","valid")
            }
list_access_functions = {**dico_loss,
                         **dico_accuracy,
                         **dico_mae
                         }
list_values_names_availables = []
for name,f in list_access_functions.items():
    try:
        for dico in global_dict.values():
            f(dico)
        list_values_names_availables.append(name)
    except Exception():
        pass

window_mean = IntSlider(value=10,min=0,max=200,description="Mean window",step=10)
log_scale = Checkbox(False,description="y_log")
filters = {name:Checkbox(False,description=name) for name in list_values_names_availables}
list_widgets = [*filters.values(),log_scale,window_mean]
box = VBox(list_widgets)
display(box)


VBox(children=(Checkbox(value=False, description='tr_loss'), Checkbox(value=False, description='valid_loss'), …

In [144]:
list_values_names_availables_filtered = []
for metric,widg in filters.items():
    if widg.value is True:
        list_values_names_availables_filtered.append(metric)
list_values_names_availables = list_values_names_availables_filtered
liste_values = []
min_val,max_val = 1000000,-1000000
max_length = 0
for metric_name in list_values_names_availables:
    for f in global_dict.keys():
        batch_size = global_dict[f]["data"]["batch_size"] * global_dict[f]["data"]["eval_step"] if "valid" in metric_name else global_dict[f]["data"]["batch_size"]
        window = int(window_mean.value/batch_size)
        v = tls.moving_mean(list_access_functions[metric_name](global_dict[f]),window=window)
        x = np.arange(0,len(v)) * batch_size

        max_length = max(np.max(x),max_length)
        min_val = min(min_val,np.min(v))
        max_val = max(max_val,np.max(v))
        x = pd.Series(x)
        v = pd.Series(v)
        graph = go.Scatter(x=x,y=v,mode='lines',name=f"{metric_name} {f}")
        liste_values.append(graph)
fig = go.Figure(
    data=liste_values,
    layout=go.Layout(
        title=go.layout.Title(text=""),
        xaxis_title="Number of samples",
        yaxis_title=f"Values",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        yaxis={'tickformat':'.0e'}
    )
)
import numpy as np
epoch_size = max(map(lambda x:x["data"]["dataset"]["attr_length_dataset"],global_dict.values()))

for x in np.arange(0,max_length,epoch_size):
    fig.update_layout(shapes=[
        dict(
          type= 'line',
          yref= 'y', y0= min_val, y1= max_val,
          xref= 'x', x0= x, x1= x,
            line=dict(
                color="orange",
                width=2,
            )
        )
    ])
if log_scale.value is True:
    fig.update_yaxes(type="log")


fig.show()