# Analysis

## Get backend data

In [38]:
import os, re
from main.FolderInfos import FolderInfos
import pandas as pd
import json

FolderInfos.init(test_without_data=True)
list_dataout_folders = os.listdir(FolderInfos.data_folder)
with open(FolderInfos.root_folder+"main"+FolderInfos.separator+"src"+FolderInfos.separator+"analysis"+FolderInfos.separator+"extract_data.json","r") as fp:
    dico_access = json.load(fp)
dico_metrics = {}
available_metrics = dico_access["metrics"].keys()
runs_metadata = []
for f in list_dataout_folders:
    path_json = FolderInfos.data_folder+f+FolderInfos.separator+f+"parameters.json"

    if os.path.exists(path_json) is True:
        with open(path_json,"r") as fp:
            dico = json.load(fp)
        dico_run_metadata = {"folder_id":f}
        for metadata_name,dico_metadata in dico_access["metadata"].items():
            try:
                value_metadata = eval(dico_metadata["access"])(dico)
            except:
                value_metadata = eval(dico_metadata["default"])
            dico_run_metadata[metadata_name] = value_metadata
        runs_metadata.append(dico_run_metadata)

        dico_metrics[f] = {}
        for metric_name,access_func in dico_access["metrics"].items():
            try:
                value_list = eval(access_func)(dico)
            except:
                value_list = {"tr_values":[],"valid_values":[]}
            dico_metrics[f][metric_name] = value_list

df = pd.DataFrame(runs_metadata)
print("Backend data ready !")

Backend data ready !


## Visualize plots and filter

In [57]:
print(available_metrics)

dict_keys(['accuracy_classification-0.25', 'accuracy_classification-0.1', 'mae', 'loss'])


In [65]:
import ipywidgets as widgets
compute_width = lambda value:f"{max(len(str(value)) * 12, 60) + 20}px"
list_vertical_widgets = []
dico_changing_widgets_metadata = {}
list_changing_widgets_metrics = []
list_changing_widgets_tr_valid = []
# Widgets for the metadata ------------------------------------------------------------------
label_metadata_filter = "Metadata"
llocalvertwidgets = []
for metadata_name in df.columns:
    if metadata_name == "folder_id":
        continue
    possible_values = df[metadata_name].unique()
    label = widgets.Label(metadata_name)
    possibilities = [widgets.ToggleButton(
        value=True,
        description=str(value),
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='',
        icon='check',
        layout = widgets.Layout(width = compute_width(value), margin='0px 0px 0px 0px')
    ) for value in possible_values]
    dico_changing_widgets_metadata[metadata_name] = possibilities
    box = widgets.HBox(layout=widgets.Layout(width='100%',display='inline-flex',flex_flow='row wrap'),width='100%')
    box.overflow_x = 'auto'
    box.children = [label]+possibilities
    llocalvertwidgets.append(box)

box = widgets.VBox(width='100%')
box.children = llocalvertwidgets
list_vertical_widgets.append(box)
# Widgets to select metrics ---------------------------------------------------------------
label_metrics_filter = "Metrics"
possibilities = [widgets.ToggleButton(
        value=True,
        description=metric_name,
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='',
        icon='check',
        layout = widgets.Layout(width = compute_width(metric_name), margin='0px 0px 0px 0px')
    )
    for metric_name in available_metrics]
list_changing_widgets_metrics = possibilities
box = widgets.HBox(layout=widgets.Layout(width='100%',display='inline-flex',flex_flow='row wrap'),width='100%')
box.overflow_x = 'auto'
box.children = possibilities
list_vertical_widgets.append(box)

# widgets to select tr or valid ----------------------------------------------------------
label_tr_valid_filter = "Mode_of_training"
possibilities = [widgets.ToggleButton(
        value=True,
        description=mode,
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='',
        icon='check',
        layout = widgets.Layout(width = compute_width(mode), margin='0px 0px 0px 0px')
    )
    for mode in ["tr","valid"]]
list_changing_widgets_tr_valid = possibilities
box = widgets.HBox(layout=widgets.Layout(width='100%',display='inline-flex',flex_flow='row wrap'),width='100%')
box.overflow_x = 'auto'
box.children = possibilities
list_vertical_widgets.append(box)

titles = ["Metadata", "Metrics","Mode_of_training"]
l = []
for item,title in zip(list_vertical_widgets,titles):
    accordion = widgets.Accordion(children=[item],titles=(title,))
    accordion.set_title(0,title)
    accordion.selected_index = 0
    l.append(accordion)
# widget for moving average
moving_average = widgets.IntText(
    value=100,
    description='Mean window:',
    disabled=False
)
# widget to toggle log scale for the y axis
y_log_scale = widgets.ToggleButton(
        value=True,
        description="logy",
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='',
        icon='check',
        layout = widgets.Layout(width = compute_width("logy"), margin='0px 0px 0px 0px')
    )
global_box = widgets.VBox(l+[moving_average,y_log_scale])

# Handlers --------------------------------------------------------------------------------------
from IPython.display import display,clear_output
import plotly.graph_objects as go
import main.src.analysis.tools as tls
def filter_metadata():
    # Extract informations from widgets to build a query for the dataframe
    list_sub_requests_1 = []
    for metadata_name,liste_w in dico_changing_widgets_metadata.items():
        list_sub_requests_2 = []
        for possibility_widget in liste_w:
            if possibility_widget.value is True:
                list_sub_requests_2.append(f"({metadata_name} == {possibility_widget.description})")
            else:
                list_sub_requests_2.append(f"not ({metadata_name} == {possibility_widget.description})")
                print(possibility_widget.description)
        poss = r" or ".join(list_sub_requests_2)
        list_sub_requests_1.append(f"({poss})")
    query = r" or ".join(list_sub_requests_1)
    # Apply the query to the dataframe
    print(query)
    filtered_dataframe = df.query(query)
    list_runs = filtered_dataframe["folder_id"].unique()
    return list_runs,filtered_dataframe
def filter_metrics(runs):
    new_dico = {}
    for f,dico in dico_metrics.items():
        if f not in runs:
            continue
        new_dico[f] = {}
        for metric_name,list_values in dico.items():
            new_dico[f][metric_name] = list_values
    return new_dico
def plot(dico_vals,filtered_dataframe):
    list_scatters = []
    for run,dico_metrics in dico_vals.items():
        line_dataframe = filtered_dataframe[filtered_dataframe["folder_id"] == run]
        batch_size_tr = line_dataframe["batch_size"]
        batch_size_valid = line_dataframe["batch_size"] * line_dataframe["eval_step"]
        for metric_name,metric_values in dico_metrics.items():
            if metric_name == "loss":
                metric_name = "loss_"+filtered_dataframe[filtered_dataframe["folder_id"] == run]["loss"]
                access_name = "_loss"
            else:
                access_name = "_values"
            for mode_w in list_changing_widgets_tr_valid:
                if mode_w.value is False:
                    continue
                batch_size = batch_size_tr if mode_w.description == "tr" else batch_size_valid
                window = int(moving_average.value/batch_size)
                v = tls.moving_mean(metric_values[mode_w.description+access_name],window=window)
                x = np.arange(0,len(v)) * batch_size
                x = pd.Series(x)
                v = pd.Series(v)
                graph = go.Scatter(x=x,y=v,mode='lines',name=f"{metric_name} {f}",hovertemplate='Sample n°: %{x}'+f'<br>{metric_name}:'+' %{y}')
                list_scatters.append(graph)
    fig = go.Figure(
        data=list_scatters,
        layout=go.Layout(
            title=go.layout.Title(text=""),
            xaxis_title="Number of samples processed",
            yaxis_title="Values",
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            ),
            yaxis={'tickformat':'.1e',},
        )
    )
    if y_log_scale.value is True:
        fig.update_yaxes(type="log")
    fig.show()
    display(global_box)
def handler_metadata(v):
    runs,filtered_dataframe = filter_metadata()
    dico_vals = filter_metrics(runs)
    plot(dico_vals,filtered_dataframe)
def handler_metrics(v):
    runs,filtered_dataframe = filter_metadata()
    dico_vals = filter_metrics(runs)
    plot(dico_vals,filtered_dataframe)


for name,liste_w in dico_changing_widgets_metadata.items():
    for w in liste_w:
        w.observe(handler_metadata)
for w in list_changing_widgets_metrics:
    w.observe(handler_metrics)

for w in list_changing_widgets_tr_valid:
    w.observe(handler_metrics)
global_box

VBox(children=(Accordion(children=(VBox(children=(HBox(children=(Label(value='grid_size_px'), ToggleButton(val…

((grid_size_px == 1000)) or ((limit_num_images == None)) or ((resizer_size == 256)) or ((length_dataset == -1)) or ((prct_tr == 0.7)) or ((batch_size == 10)) or ((eval_step == 10)) or ((model_name == resnet18)) or ((num_classes == 3)) or ((loss == crossentropy) or (loss == mse) or (loss == multiclassnonexlusivcrossentropy)) or ((optimizer_name == adam)) or ((optimizer_lr == 0.001)) or ((optimizer_eps == 1e-07)) or ((patch_exclusion_policy == marginmorethan_1000)) or ((num_patches_rejected == 0))


UndefinedVariableError: name 'resnet18' is not defined

In [2]:
from ipywidgets import Dropdown

choice_folder = Dropdown(
    options=selected_folder,
    value=selected_folder[0],
    description='Folder to vizualize result',
    disabled=False,
         )
display(choice_folder)

Dropdown(description='Folder to vizualize result', options=('2021-06-11_12h30min51s_', '2021-06-15_00h55min54s…

In [61]:
choice_folder = choice_folder.value
from main.src.models.ModelFactory import ModelFactory
from main.src.analysis.tools import RGB_Overlay_Patch
import json
FolderInfos.init(test_without_data=True)
folder = FolderInfos.data_folder + choice_folder + FolderInfos.separator
with open(folder + choice_folder + "parameters.json", "r") as fp:
    dico = json.load(fp)

rgb_overlay = RGB_Overlay_Patch(usage_type="classification", patch_creator="fixed_px",
                            grid_size=dico["data"]["dataset"]["attr_patch_creator"]["attr_grid_size_px"],
                            input_size=dico["data"]["dataset"]["attr_dataset"]["attr_resizer"][
                                "attr_out_size_w"])
epoch = 0
iteration = 6080
import torch
name = "027481_0319CB_0EB7"

device = torch.device("cuda")
model = ModelFactory(model_name=dico["model"]["attr_model_name"], num_classes=dico["model"]["attr_num_classes"])()
model.to(device)
model.load_state_dict(torch.load(f"{folder}{choice_folder}_model_epoch-{epoch}_it-{iteration}.pt"))
array_overlay = rgb_overlay(name_img=name, model=model, blending_factor=0.5, device=device)

In [62]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
plt.imshow(array_overlay[0])
plt.gcf().text(0.02, 0.75, f"Red: other; Green: spill; Blue: seep", fontsize=14)
plt.savefig(f"{folder}{choice_folder}_{name}_rgb_overlay_true.png")
plt.figure(figsize=(10,10))
plt.imshow(array_overlay[1])
plt.gcf().text(0.02, 0.75, f"Red: other; Green: spill; Blue: seep", fontsize=14)
plt.savefig(f"{folder}{choice_folder}_{name}_rgb_overlay_pred.png")

In [63]:
from main.src.data.DatasetFactory import DatasetFactory

FolderInfos.init(test_without_data=False)
dataset = DatasetFactory(usage_type="classification", patch_creator="fixed_px",grid_size=1000 , input_size=256)
with open(FolderInfos.input_data_folder+"class_mappings.json","r") as fp:
    dico_corresp = json.load(fp)
plt.figure()
array = dataset.attr_dataset.images[name]
plt.imshow(array,cmap="gray")

plt.figure()
plt.imshow(array[:10,:10],cmap="gray")
import numpy as np
print(np.std(array),np.min(array),np.max(array))