In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np

import chart_studio.plotly as py	
import plotly.graph_objs as go
from plotly import subplots
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
import plotly.express as px

In [2]:
log_path = 'logs'
batch_selection_methods = [o for o in os.listdir(log_path) if os.path.isdir(os.path.join(log_path,o))]
files = [[os.path.join(log_path, o, f) for f in os.listdir(os.path.join(log_path, o)) if os.path.isfile(os.path.join(log_path, o, f))] for o in batch_selection_methods]

In [3]:
files = np.array(files)

In [4]:
files.shape

(3, 8)

In [5]:
plot_matrix = np.empty((files.T.shape), dtype=object)
for i in range(files.shape[0]):
    for j in range(files.shape[1]):
        f = files[i][j]
        data = pd.read_csv(f)
        plot_matrix[j][i] = [data, f.split('\\')[2].split('.')[0], f.split('\\')[1]]


In [6]:
color_list = px.colors.qualitative.T10

In [None]:
for i in range(plot_matrix.shape[0]):
    fig = subplots.make_subplots(rows=3, cols=2, vertical_spacing=0.06, horizontal_spacing=0.03, 
                            subplot_titles=("Time-based Loss", "Epoch-based Loss", 
                                            "Time-based Training Metrics", "Epoch-based Training Metrics",
                                            "Time-based Validation Metrics" , "Epoch-based Validation Metrics"))
        
    for j in range(plot_matrix.shape[1]):
        plot_data = plot_matrix[i][j][0]
        name = plot_matrix[i][j][1].upper()
        selection = plot_matrix[i][j][2].title()
        color = color_list[j]
        time_based_loss_plot = go.Scatter(x=plot_data['Time'], y=plot_data['Loss'], name = selection + ' Loss Time', mode='lines', line_color=color)
        time_based_metrics_plot = go.Scatter(x=plot_data['Time'], y=plot_data['Metrics'], name = selection + ' Metrics Time', mode='lines', line_color=color)
        time_based_vl_plot = go.Scatter(x=plot_data['Time'], y=plot_data['Validation'], name =  selection + ' Validation Time', mode='lines', line_color=color)

        epoch_based_loss_plot = go.Scatter(x=plot_data['Epoch'], y=plot_data['Loss'], name = selection + ' Loss Epoch', mode='lines', line_color=color)
        epoch_based_metrics_plot = go.Scatter(x=plot_data['Epoch'], y=plot_data['Metrics'], name = selection + ' Metrics Epoch', mode='lines', line_color=color)
        epoch_based_vl_plot = go.Scatter(x=plot_data['Epoch'], y=plot_data['Validation'], name = selection + ' Validation Epoch', mode='lines',line_color=color)

        fig.append_trace(time_based_loss_plot, 1, 1)
        fig.append_trace(time_based_metrics_plot, 2, 1)
        fig.append_trace(time_based_vl_plot, 3, 1)

        fig.append_trace(epoch_based_loss_plot, 1, 2)
        fig.append_trace(epoch_based_metrics_plot, 2, 2)
        fig.append_trace(epoch_based_vl_plot, 3, 2)

        fig.update_layout(title=go.layout.Title(text = name))

    plot(fig, filename='plots/' + str(i) + '.html')

In [7]:
# Columns = Algo name, Total time, Average time per epoch, Max acc., Max Validation acc., Min loss, Max acc. index, Max Validation acc. index, Min Loss index, 
# Max acc. timestamp, Max Validation acc. timestamp, Min Loss timestamp
results = pd.DataFrame(columns=['Model', 'algorithm', 'total_time', 'avg_epoch', 'max_metric', 'max_validation', 'min_loss', 'max_metrics_epoch', 'max_validation_epoch', 'min_loss_epoch', 'max_metrics_ts', 'max_validation_ts', 'min_loss_ts'])

In [8]:
for i in range(plot_matrix.shape[0]):
    for j in range(plot_matrix.shape[1]):
        data = plot_matrix[i][j][0]
        name = plot_matrix[i][j][1].upper()
        # that is the only regression task, easier to silve it like this:
        if name != 'BOSTON_HOUSES':
            algorithm = plot_matrix[i][j][2].title()
            total_time = data['Time'][len(data)-1]
            avg_epoch = data['Time'].diff().mean()
            max_metric = data['Metrics'].max()
            max_validation = data['Validation'].max()
            min_loss = data['Loss'].min()

            max_metrics_epoch = data['Metrics'].idxmax()
            max_validation_epoch = data['Validation'].idxmax()
            min_loss_epoch = data['Loss'].idxmin()

            max_metrics_ts = data['Time'][max_metrics_epoch]
            max_validation_ts = data['Time'][max_validation_epoch]
            min_loss_ts = data['Time'][min_loss_epoch]
        else:
            algorithm = plot_matrix[i][j][2].title()
            total_time = data['Time'][len(data)-1]
            avg_epoch = data['Time'].diff().mean()
            data['Metrics'] = data['Metrics'].max() - data['Metrics']
            data['Validation'] = data['Validation'].max() - data['Validation']
            max_metric = data['Metrics'].max()
            max_validation = data['Validation'].max()
            min_loss = data['Loss'].min()

            max_metrics_epoch = data['Metrics'].idxmax()
            max_validation_epoch = data['Validation'].idxmax()
            min_loss_epoch = data['Loss'].idxmin()

            max_metrics_ts = data['Time'][max_metrics_epoch]
            max_validation_ts = data['Time'][max_validation_epoch]
            min_loss_ts = data['Time'][min_loss_epoch]

        results.loc[len(results)] = [name, algorithm, total_time, avg_epoch, max_metric, max_validation, min_loss, max_metrics_epoch, max_validation_epoch, min_loss_epoch, max_metrics_ts, max_validation_ts, min_loss_ts]


In [9]:
results

Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts
0,ARCHIMEDEAN,Original,18.735745,0.125743,0.966667,0.95,0.097041,148,138,149,18.608615,17.432301,18.735745
1,ARCHIMEDEAN,Sorting,22.599882,0.151677,0.965278,0.95,0.039666,148,141,149,22.448273,21.528064,22.599882
2,ARCHIMEDEAN,Windowed,33.72951,0.226373,0.958333,0.9625,0.063966,149,122,149,33.72951,27.624132,33.72951
3,BOSTON_HOUSES,Original,2.692392,0.141705,4.86317,4.757124,3.787659,19,19,3,2.692392,2.692392,0.742943
4,BOSTON_HOUSES,Sorting,2.770498,0.145816,4.63723,3.940103,1.590696,18,19,15,2.63536,2.770498,2.210228
5,BOSTON_HOUSES,Windowed,5.39758,0.284083,6.94994,3.63395,4.003263,19,18,19,5.39758,5.099245,5.39758
6,FASHION_MNIST,Original,96.344,5.070737,0.926687,0.904746,1.52045,19,19,17,96.344,96.344,86.203
7,FASHION_MNIST,Sorting,99.626,5.243474,0.854042,0.832052,1.499295,19,17,17,99.626,89.579,89.579
8,FASHION_MNIST,Windowed,303.345,15.965526,0.890375,0.893633,1.551183,19,19,19,303.345,303.345,303.345
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311


In [10]:
# best metrics
print(results[results.groupby('Model')['max_metric'].transform(max) == results['max_metric']]['algorithm'].value_counts())
results[results.groupby('Model')['max_metric'].transform(max) == results['max_metric']]

Original    3
Windowed    3
Sorting     3
Name: algorithm, dtype: int64


Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts
0,ARCHIMEDEAN,Original,18.735745,0.125743,0.966667,0.95,0.097041,148,138,149,18.608615,17.432301,18.735745
5,BOSTON_HOUSES,Windowed,5.39758,0.284083,6.94994,3.63395,4.003263,19,18,19,5.39758,5.099245,5.39758
6,FASHION_MNIST,Original,96.344,5.070737,0.926687,0.904746,1.52045,19,19,17,96.344,96.344,86.203
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311
11,IRIS,Windowed,23.953,1.260684,0.990741,1.0,0.001543,19,8,18,23.953,12.452,22.656
13,MNIST,Sorting,63.665524,3.350817,0.995354,0.988553,1.46115,19,10,10,63.665524,33.868595,33.868595
16,MUSIC_GENRES,Sorting,58.827,1.200551,0.861111,0.694444,0.026859,48,49,35,57.202,58.827,39.983
19,SENTIMENT,Sorting,44.357231,0.90525,0.795185,0.788086,0.381779,44,32,35,40.038883,29.733243,32.367318
23,WINE_QUALITY,Windowed,11.641,0.612684,0.983333,1.0,0.169513,18,5,19,11.297,6.547,11.641


In [44]:
# best loss
results[results.groupby('Model')['min_loss'].transform(min) == results['min_loss']]['algorithm'].value_counts()

Sorting    8
Name: algorithm, dtype: int64

In [45]:
# best validation accuracy
results[results.groupby('Model')['max_validation'].transform(max) == results['max_validation']]['algorithm'].value_counts()

Original    6
Windowed    4
Sorting     3
Name: algorithm, dtype: int64

In [16]:
# fastest best accuracy per epoch
results['mme'] = (1 / results['max_metric']) * results['max_metrics_epoch']
results['mve'] = (1 / results['max_validation']) * results['max_validation_epoch']
results['mle'] = results['min_loss'] * results['min_loss_epoch']

results['mmts'] = (1 / results['max_metric']) * results['max_metrics_ts']
results['mvts'] = (1 / results['max_validation']) * results['max_validation_ts']
results['mlts'] = results['min_loss'] * results['min_loss_ts']


In [17]:
results

Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts,mme,mve,mle,mmts,mvts,mlts
0,ARCHIMEDEAN,Original,18.735745,0.125743,0.966667,0.95,0.097041,148,138,149,18.608615,17.432301,18.735745,153.103,145.263,14.4591,19.250292,18.349791,1.81813
1,ARCHIMEDEAN,Sorting,22.599882,0.151677,0.965278,0.95,0.039666,148,141,149,22.448273,21.528064,22.599882,153.324,148.421,5.91024,23.255764,22.66112,0.896448
2,ARCHIMEDEAN,Windowed,33.72951,0.226373,0.958333,0.9625,0.063966,149,122,149,33.72951,27.624132,33.72951,155.478,126.753,9.53088,35.196011,28.700398,2.15753
3,BOSTON_HOUSES,Original,2.692392,0.141705,4.86317,4.757124,3.787659,19,19,3,2.692392,2.692392,0.742943,3.90692,3.99401,11.363,0.553629,0.56597,2.814016
4,BOSTON_HOUSES,Sorting,2.770498,0.145816,4.63723,3.940103,1.590696,18,19,15,2.63536,2.770498,2.210228,3.88163,4.82221,23.8604,0.568305,0.703154,3.5158
5,BOSTON_HOUSES,Windowed,5.39758,0.284083,6.94994,3.63395,4.003263,19,18,19,5.39758,5.099245,5.39758,2.73384,4.95329,76.062,0.776637,1.403224,21.607937
6,FASHION_MNIST,Original,96.344,5.070737,0.926687,0.904746,1.52045,19,19,17,96.344,96.344,86.203,20.5031,21.0004,25.8477,103.96601,106.487346,131.067361
7,FASHION_MNIST,Sorting,99.626,5.243474,0.854042,0.832052,1.499295,19,17,17,99.626,89.579,89.579,22.2472,20.4314,25.488,116.652384,107.660324,134.305325
8,FASHION_MNIST,Windowed,303.345,15.965526,0.890375,0.893633,1.551183,19,19,19,303.345,303.345,303.345,21.3393,21.2615,29.4725,340.693521,339.451427,470.54353
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311,18.1682,5.0,0.0113722,14.918131,7.108,0.009164


In [18]:
# best epoch based metrics
print(results[results.groupby('Model')['mme'].transform(min) == results['mme']]['algorithm'].value_counts())
results[results.groupby('Model')['mme'].transform(min) == results['mme']]


Original    5
Windowed    2
Sorting     1
Name: algorithm, dtype: int64


Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts,mme,mve,mle,mmts,mvts,mlts
0,ARCHIMEDEAN,Original,18.735745,0.125743,0.966667,0.95,0.097041,148,138,149,18.608615,17.432301,18.735745,153.103,145.263,14.4591,19.250292,18.349791,1.81813
5,BOSTON_HOUSES,Windowed,5.39758,0.284083,6.94994,3.63395,4.003263,19,18,19,5.39758,5.099245,5.39758,2.73384,4.95329,76.062,0.776637,1.403224,21.607937
6,FASHION_MNIST,Original,96.344,5.070737,0.926687,0.904746,1.52045,19,19,17,96.344,96.344,86.203,20.5031,21.0004,25.8477,103.96601,106.487346,131.067361
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311,18.1682,5.0,0.0113722,14.918131,7.108,0.009164
12,MNIST,Original,52.946526,2.786659,0.995229,0.98872,1.461272,18,15,19,50.216254,42.108878,52.946526,18.0863,15.1711,27.7642,50.456975,42.589287,77.369302
15,MUSIC_GENRES,Original,46.548,0.949959,0.845833,0.708333,0.779585,43,47,35,42.625,45.189,36.439,50.8374,66.3529,27.2855,50.394087,63.796237,28.407285
19,SENTIMENT,Sorting,44.357231,0.90525,0.795185,0.788086,0.381779,44,32,35,40.038883,29.733243,32.367318,55.3331,40.6047,13.3623,50.351671,37.728427,12.357157
23,WINE_QUALITY,Windowed,11.641,0.612684,0.983333,1.0,0.169513,18,5,19,11.297,6.547,11.641,18.3051,5.0,3.22074,11.488474,6.547,1.973297


In [19]:
# best epoch based validation
print(results[results.groupby('Model')['mve'].transform(min) == results['mve']]['algorithm'].value_counts())
results[results.groupby('Model')['mve'].transform(min) == results['mve']]


Windowed    3
Original    3
Sorting     3
Name: algorithm, dtype: int64


Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts,mme,mve,mle,mmts,mvts,mlts
2,ARCHIMEDEAN,Windowed,33.72951,0.226373,0.958333,0.9625,0.063966,149,122,149,33.72951,27.624132,33.72951,155.478,126.753,9.53088,35.196011,28.700398,2.15753
3,BOSTON_HOUSES,Original,2.692392,0.141705,4.86317,4.757124,3.787659,19,19,3,2.692392,2.692392,0.742943,3.90692,3.99401,11.363,0.553629,0.56597,2.814016
7,FASHION_MNIST,Sorting,99.626,5.243474,0.854042,0.832052,1.499295,19,17,17,99.626,89.579,89.579,22.2472,20.4314,25.488,116.652384,107.660324,134.305325
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311,18.1682,5.0,0.0113722,14.918131,7.108,0.009164
10,IRIS,Sorting,21.955,1.155526,0.981481,1.0,0.000144,18,5,19,21.284,9.908,21.955,18.3396,5.0,0.00274096,21.685585,9.908,0.003167
14,MNIST,Windowed,199.979743,10.52525,0.983708,0.987132,1.461157,18,9,19,190.021651,95.786024,199.979743,18.2981,9.11732,27.762,193.168693,97.03463,292.201723
15,MUSIC_GENRES,Original,46.548,0.949959,0.845833,0.708333,0.779585,43,47,35,42.625,45.189,36.439,50.8374,66.3529,27.2855,50.394087,63.796237,28.407285
19,SENTIMENT,Sorting,44.357231,0.90525,0.795185,0.788086,0.381779,44,32,35,40.038883,29.733243,32.367318,55.3331,40.6047,13.3623,50.351671,37.728427,12.357157
23,WINE_QUALITY,Windowed,11.641,0.612684,0.983333,1.0,0.169513,18,5,19,11.297,6.547,11.641,18.3051,5.0,3.22074,11.488474,6.547,1.973297


In [50]:
# best epoch based loss
print(results[results.groupby('Model')['mle'].transform(min) == results['mle']]['algorithm'].value_counts())

Sorting     7
Original    1
Name: algorithm, dtype: int64


In [51]:
# best time based metrics
print(results[results.groupby('Model')['mmts'].transform(min) == results['mmts']]['algorithm'].value_counts())


Original    8
Name: algorithm, dtype: int64


In [21]:
# best time based validation
print(results[results.groupby('Model')['mvts'].transform(min) == results['mvts']]['algorithm'].value_counts())
results[results.groupby('Model')['mvts'].transform(min) == results['mvts']]

Original    6
Sorting     2
Name: algorithm, dtype: int64


Unnamed: 0,Model,algorithm,total_time,avg_epoch,max_metric,max_validation,min_loss,max_metrics_epoch,max_validation_epoch,min_loss_epoch,max_metrics_ts,max_validation_ts,min_loss_ts,mme,mve,mle,mmts,mvts,mlts
0,ARCHIMEDEAN,Original,18.735745,0.125743,0.966667,0.95,0.097041,148,138,149,18.608615,17.432301,18.735745,153.103,145.263,14.4591,19.250292,18.349791,1.81813
3,BOSTON_HOUSES,Original,2.692392,0.141705,4.86317,4.757124,3.787659,19,19,3,2.692392,2.692392,0.742943,3.90692,3.99401,11.363,0.553629,0.56597,2.814016
6,FASHION_MNIST,Original,96.344,5.070737,0.926687,0.904746,1.52045,19,19,17,96.344,96.344,86.203,20.5031,21.0004,25.8477,103.96601,106.487346,131.067361
9,IRIS,Original,15.311,0.805842,0.990741,1.0,0.000599,18,5,19,14.78,7.108,15.311,18.1682,5.0,0.0113722,14.918131,7.108,0.009164
13,MNIST,Sorting,63.665524,3.350817,0.995354,0.988553,1.46115,19,10,10,63.665524,33.868595,33.868595,19.0887,10.1158,14.6115,63.962683,34.260784,49.487103
15,MUSIC_GENRES,Original,46.548,0.949959,0.845833,0.708333,0.779585,43,47,35,42.625,45.189,36.439,50.8374,66.3529,27.2855,50.394087,63.796237,28.407285
19,SENTIMENT,Sorting,44.357231,0.90525,0.795185,0.788086,0.381779,44,32,35,40.038883,29.733243,32.367318,55.3331,40.6047,13.3623,50.351671,37.728427,12.357157
21,WINE_QUALITY,Original,8.126,0.427684,0.966667,1.0,0.14562,19,8,19,8.126,4.453,8.126,19.6552,8.0,2.76678,8.406207,4.453,1.183307


In [53]:
# best time based loss
print(results[results.groupby('Model')['mlts'].transform(min) == results['mlts']]['algorithm'].value_counts())


Sorting     6
Original    2
Name: algorithm, dtype: int64
