# Analysis of mass training of models

### Combine all results into one dataframe

In [1]:
import os
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from plotly.subplots import make_subplots

# Combine into one dataframe

In [2]:
def read_csvs(training_results):
    big_df: pd.DataFrame = None

    image_files_FINAL  = [file[12:-4] for file in os.listdir(f"{training_results}")]
    for f in image_files_FINAL:
        if f.split('__') == ['']:
            continue
        # print(f.split('__'))
        architecture, encoder, loss, encoder_freeze, dataset = f.split('__')
        df = pd.read_csv(f"{training_results}/best_model__{architecture}__{encoder}__{loss}__{encoder_freeze}__{dataset}.csv", index_col=0)
        df['architecture'] = architecture
        df['encoder'] = encoder
        df['loss_func'] = loss
        df['dataset'] = dataset
        if type(big_df) == 'NoneType':
            big_df = df
        else:
            big_df = pd.concat((big_df, df))
    return big_df

    # big_df['memory_used'] /= (1024*1024)

In [3]:
# l = big_df.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset'])
# for g in l:
#     print(g[1].iloc[g[1]['val_iou_score'].idxmax()])
#     break
# big_df[:5]

# 5.1.1 Feature engineering

In [4]:
def plot_data_feature_engineering(big_df: pd.DataFrame, y_volumn, y_column_name, plot_title, x_column_name="Epochs", filter=None, **plot_options):
    fig = go.Figure()
    big_df_copy = big_df.copy(deep=True)
    if filter != None:
        big_df_copy.query(filter, inplace=True)
    # for f in image_files_FINAL:
    for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        architecture, encoder, loss, freeze, dataset = data[0]
        fig.add_trace(go.Scatter(x=data[1].index[:20]+1, y=data[1][y_volumn][:20], name=f"{dataset.lower()}",))# hovertemplate=hovertemplate)) # 
    fig.update_layout(
        {
            "paper_bgcolor": "rgba(0, 0, 0, 0)",
            # "plot_bgcolor": "rgba(0, 0, 0, 0)",
        }
    )
    # fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=20))
    fig.update_layout(
        # title="Training loss - (Architecture Encoder Loss)",
        xaxis_title=x_column_name,
        yaxis_title=y_column_name,
            font=dict(
            size=20,
        ),
        showlegend=True
    )
    fig.update_layout(plot_options)
    # fig.update_layout(hovermode="x unified")
    fig.show()
    return fig

feature_engieering = read_csvs("./TRAINING_RESULTS/feature_engineering")    
feature_train_fig = plot_data_feature_engineering(feature_engieering, y_volumn='train_iou_score', y_column_name="Train IoU",  plot_title="Train IoU", showlegend=False)
feature_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_TRAIN.svg", height=600, width=800)

feature_val_fig = plot_data_feature_engineering(feature_engieering, y_volumn='val_iou_score',   y_column_name="Val IoU", plot_title="Val IoU")
feature_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_VAL.svg", height=600, width=800)

# 5.1.2 Testing architectures

In [5]:
architecture_color_dict = {
    'PSPNet': '#e6194b',
    'Linknet': '#000075',
    'Unet': '#3cb44b',
    'PAN': '#4363d8',
    'MAnet': '#f58231',
    'DeepLabV3Plus': '#000000',
    'FPN': '#46f0f0',
    'FPN': '#f032e6',
    'UnetPlusPlus':'#9a6324'
}
# USED: '#e6194b', '#3cb44b', '#000000', '#4363d8', '#f58231', '#46f0f0', '#f032e6', '#000075'
# Available: #911eb4, , '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#ffd8b1', , '#808080', '#ffffff', '#808000',  '#bcf60c', '#ffe119'
# https://sashamaps.net/docs/resources/20-colors/

In [6]:
def plot_data_ARCHITECTURE(big_df: pd.DataFrame, y_volumn, y_column_name, plot_title, x_column_name="Epochs", filter=None, **plot_options):
    fig = go.Figure()
    big_df_copy = big_df.copy(deep=True)
    if filter != None:
        big_df_copy.query(filter, inplace=True)

    data_sorted = []
    for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        architecture, encoder, loss, freeze, dataset = data[0]
        data_sorted.append((data, np.max(data[1][y_volumn]))) # data, highest score

    for i, data in enumerate(sorted(data_sorted, key=lambda x: x[1])):
    # for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        data, best_score = data
        architecture, encoder, loss, freeze, dataset = data[0]
        # fig.add_trace(go.Scatter(x=data[1].index+1, y=data[1][y_volumn], name=f"{architecture} {encoder} {loss} {dataset.lower()}", hovertemplate=hovertemplate)) # 
        fig.add_trace(go.Scatter(x=data[1].index[:20]+1, y=data[1][y_volumn][:20], name=f"{8-i}. {architecture}", marker_color=architecture_color_dict[architecture]))# hovertemplate=hovertemplate)) # 
    fig.update_layout(
        {
            "paper_bgcolor": "rgba(0, 0, 0, 0)",
            # "plot_bgcolor": "rgba(0, 0, 0, 0)",
        }
    )
    # fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=20))
    fig.update_layout(
        # title="Training loss - (Architecture Encoder Loss)",
        xaxis_title=x_column_name,
        yaxis_title=y_column_name,
            font=dict(
            size=20,
        ),
        showlegend=True
    )
    fig.update_layout(plot_options)
    # fig.update_layout(hovermode="x unified")
    fig.show()
    return fig

In [7]:
Select_architecture_df = read_csvs("./TRAINING_RESULTS/Select_architecture")    
# feature_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_TRAIN.svg", height=600, width=800)
# feature_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_VAL.svg", height=600, width=800)
# Loss
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='train_loss', y_column_name="Train Jaccard Loss", plot_title="Train loss", filter='loss_func == "JaccardLoss"')
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='val_loss', y_column_name="Val Jaccard Loss", plot_title="Val loss", filter='loss_func == "JaccardLoss"')

# IoU
architecture_train_fig = plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='train_iou_score',   y_column_name="Train IoU", plot_title="Val IoU") #, showlegend=False
architecture_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/architecture_train_fig.svg", height=600, width=800)

architecture_val_fig = plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='val_iou_score',   y_column_name="Val IoU", plot_title="Val IoU")
architecture_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/architecture_val_fig.svg", height=600, width=800)

# 5.1.3 Testing Encoders

In [8]:
def plot_data_ARCHITECTURE(big_df: pd.DataFrame, y_volumn, y_column_name, plot_title, x_column=None, x_column_name="Epochs", filter=None, **plot_options):
    fig = go.Figure()
    big_df_copy = big_df.copy(deep=True)
    if filter != None:
        big_df_copy.query(filter, inplace=True)

    data_sorted = []
    for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        architecture, encoder, loss, freeze, dataset = data[0]
        data_sorted.append((data, np.max(data[1][y_volumn]))) # data, highest score

    for i, data in enumerate(sorted(data_sorted, key=lambda x: x[1])):
    # for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        data, best_score = data
        architecture, encoder, loss, freeze, dataset = data[0]
        # fig.add_trace(go.Scatter(x=data[1].index+1, y=data[1][y_volumn], name=f"{architecture} {encoder} {loss} {dataset.lower()}", hovertemplate=hovertemplate)) # 
        if x_column == None:
            fig.add_trace(go.Scatter(x=data[1].index[:20]+1, y=data[1][y_volumn][:20], name=f"{len(data_sorted)-i}. {encoder}"))# hovertemplate=hovertemplate)) # 
        else:
            fig.add_trace(go.Scatter(x=data[1][x_column][:20], y=data[1][y_volumn][:20], name=f"{len(data_sorted)-i}. {encoder}"))# hovertemplate=hovertemplate)) # 
    fig.update_layout(
        {
            "paper_bgcolor": "rgba(0, 0, 0, 0)",
            # "plot_bgcolor": "rgba(0, 0, 0, 0)",
        }
    )
    # fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=20))
    fig.update_layout(
        # title="Training loss - (Architecture Encoder Loss)",
        xaxis_title=x_column_name,
        yaxis_title=y_column_name,
            font=dict(
            size=20,
        ),
        showlegend=True
    )
    fig.update_layout(plot_options)
    # fig.update_layout(hovermode="x unified")
    # fig.show()
    return fig


Select_encoder_df = read_csvs("./TRAINING_RESULTS/select_encoder")    
# feature_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_TRAIN.svg", height=600, width=800)
# feature_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_VAL.svg", height=600, width=800)
# Loss
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='train_loss', y_column_name="Train Jaccard Loss", plot_title="Train loss", filter='loss_func == "JaccardLoss"')
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='val_loss', y_column_name="Val Jaccard Loss", plot_title="Val loss", filter='loss_func == "JaccardLoss"')

# IoU
encoder_train_fig = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='train_iou_score', y_column_name="Train IoU", plot_title="Val IoU", filter="freeze_encoder == False") #, showlegend=False
encoder_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/encoder_train_fig.svg", height=600, width=800)
encoder_train_fig.show()

encoder_val_fig = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='val_iou_score', y_column_name="Val IoU", plot_title="Val IoU", filter="freeze_encoder == False")
encoder_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/encoder_val_fig.svg", height=600, width=800)
encoder_val_fig.show()

# Freeze
encoder_train_fig_freeze = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='train_iou_score', y_column_name="Train IoU", plot_title="Val IoU", filter="freeze_encoder == True") #, showlegend=False
encoder_train_fig_freeze.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/freeze_encoder_train_fig.svg", height=600, width=800)
encoder_val_fig_freeze = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='val_iou_score', y_column_name="Val IoU", plot_title="Val IoU", filter="freeze_encoder == True")
encoder_val_fig_freeze.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/freeze_encoder_val_fig.svg", height=600, width=800)


In [9]:
da = Select_encoder_df[Select_encoder_df['freeze_encoder'] == True]
da = da.groupby(['encoder'])['val_iou_score'].max() # ,'freeze_encoder'
# da.to_latex() # .loc[da['freeze_encoder'] == True]
da

encoder
efficientnet-b2    0.950159
resnet101          0.954297
resnet34           0.949444
resnetv2_101       0.943098
resnetv2_50        0.948532
xception41         0.933219
Name: val_iou_score, dtype: float64

In [10]:
encoder_val_fig = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='val_iou_score', y_column_name="Val IoU", x_column_name='Inference time (ms)', x_column="inference_time", plot_title="", filter="freeze_encoder == False")
# encoder_val_fig.show()

# Loss function

In [13]:
def plot_data_ARCHITECTURE(big_df: pd.DataFrame, y_volumn, y_column_name, plot_title, x_column=None, x_column_name="Epochs", filter=None, **plot_options):
    fig = go.Figure()
    big_df_copy = big_df.copy(deep=True)
    if filter != None:
        big_df_copy.query(filter, inplace=True)

    data_sorted = []
    for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        architecture, encoder, loss, freeze, dataset = data[0]
        data_sorted.append((data, np.max(data[1][y_volumn]))) # data, highest score

    for i, data in enumerate(sorted(data_sorted, key=lambda x: x[1])):
    # for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        data, best_score = data
        architecture, encoder, loss, freeze, dataset = data[0]
        # fig.add_trace(go.Scatter(x=data[1].index+1, y=data[1][y_volumn], name=f"{architecture} {encoder} {loss} {dataset.lower()}", hovertemplate=hovertemplate)) # 
        if x_column == None:
            fig.add_trace(go.Scatter(x=data[1].index[:20]+1, y=data[1][y_volumn][:20], name=f"{len(data_sorted)-i}. {loss}"))# hovertemplate=hovertemplate)) # 
        else:
            fig.add_trace(go.Scatter(x=data[1][x_column][:20], y=data[1][y_volumn][:20], name=f"{len(data_sorted)-i}. {loss}"))# hovertemplate=hovertemplate)) # 
    fig.update_layout(
        {
            "paper_bgcolor": "rgba(0, 0, 0, 0)",
            # "plot_bgcolor": "rgba(0, 0, 0, 0)",
        }
    )
    # fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=20))
    fig.update_layout(
        # title="Training loss - (Architecture Encoder Loss)",
        xaxis_title=x_column_name,
        yaxis_title=y_column_name,
            font=dict(
            size=20,
        ),
        showlegend=True
    )
    fig.update_layout(plot_options)
    # fig.update_layout(hovermode="x unified")
    # fig.show()
    return fig


Select_encoder_df = read_csvs("./TRAINING_RESULTS/selecting_loss")    
# feature_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_TRAIN.svg", height=600, width=800)
# feature_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/feature_engineering_VAL.svg", height=600, width=800)
# Loss
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='train_loss', y_column_name="Train Jaccard Loss", plot_title="Train loss", filter='loss_func == "JaccardLoss"')
# plot_data_ARCHITECTURE(Select_architecture_df, y_volumn='val_loss', y_column_name="Val Jaccard Loss", plot_title="Val loss", filter='loss_func == "JaccardLoss"')

# IoU
loss_train_fig = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='train_iou_score', y_column_name="Train IoU", plot_title="Val IoU", filter="freeze_encoder == False") #, showlegend=False
loss_train_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/loss_train_fig.svg", height=600, width=800)
loss_train_fig.show()

loss_val_fig = plot_data_ARCHITECTURE(Select_encoder_df, y_volumn='val_iou_score', y_column_name="Val IoU", plot_title="Val IoU", filter="freeze_encoder == False")
loss_val_fig.write_image(file="D:/Master/MasterProject/Overleaf_figures/Chapter5/loss_val_fig.svg", height=600, width=800)
loss_val_fig.show()

# Memory consumption

In [12]:
def plot_data_memory(big_df: pd.DataFrame, y_volumn, y_column_name, plot_title, x_column="index", x_column_name="Epochs", filter=None):
    fig = go.Figure()
    big_df_copy = big_df.copy(deep=True)
    if filter != None:
        big_df_copy.query(filter, inplace=True)
    # for f in image_files_FINAL:
    for data in big_df_copy.groupby(['architecture', 'encoder', 'loss_func', 'freeze_encoder', 'dataset']):
        architecture, encoder, loss, freeze, dataset = data[0]
        df: pd.DataFrame = data[1]
        max_row = df.iloc[df['val_iou_score'].idxmax()]

        hovertemplate=f"<b>{architecture} {encoder} {loss} {freeze}</b><br><br>" + \
                        f"{plot_title}"+": %{y:.3}<br>" + \
                        f"{x_column_name}"+": %{x}<br>" + \
                        "<extra></extra>",
        
        fig.add_trace(go.Scatter(x=[max_row[x_column]], y=[max_row[y_volumn]], name=f"{architecture} {encoder} {loss} {dataset}", hovertemplate=hovertemplate, mode='markers')) # 
        
    # fig.update_traces(hoverinfo='text+name', mode='lines+markers')
    fig.update_layout(legend=dict(y=0.5, traceorder='reversed', font_size=16))
    fig.update_layout(
        title=f"{plot_title} - (Architecture Encoder Loss EncoderFreeze)",
        # title="Training loss - (Architecture Encoder Loss)",
        xaxis_title=x_column_name,
        yaxis_title=y_column_name,
            font=dict(
            size=14,
        )
    )
    # fig.update_layout(hovermode="x unified")
    fig.show()


# plot_data_memory(big_df, y_volumn='train_iou_score', y_column_name="IoU", x_column="memory_used", x_column_name="Memory (mb)", plot_title="Train IoU")
big_df_copy = big_df.copy(deep=True)
big_df_copy['inference_time'] = big_df_copy['inference_time']*1000
plot_data_memory(big_df_copy, y_volumn='val_iou_score', y_column_name="Val IoU", x_column="inference_time", x_column_name="Inference time (ms)", plot_title="Val IoU vs Inference time")
# plot_data_memory(big_df, y_volumn='val_iou_score', y_column_name="Val IoU", x_column="memory_used", x_column_name="Memory (mb)", plot_title="Val IoU vs GPU memory")

NameError: name 'big_df' is not defined