# Import

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import glob
import os
from os.path import join, isfile, isdir
import pickle

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from models.RNN import RNN

from plot import Plotter

# Loading data

In [None]:
# Here different labels can be selected to show performance

pred = ['drive']
prob = ['classification']
out = join('output', '_'.join(pred), 'tuning')

# Name of measure labels
if prob[0] == 'regression':
    measure_labels = ['test_{}_{}'.format(pred[0], m) for m in ['MAE', 'RMSE', 'Max AE', 'R2']]
else:
    measure_labels = ['test_{}_{}'.format(pred[0], m) for m in ['AUC', 'F1', 'Accuracy']]

# Retrieve logs and performance (metrics appendix)

In [None]:
# Make one big pd dataframe of the tuned arguments and models
# This makes the appendix of all the variable metrics

performance = pd.DataFrame([])
values = []

# Retrieve log for each model type
for model_type in ['GRU_RNN']: #  ['RandomForestClass', 'RandomForestReg', 'LinearSVMClass', 'LinearSVMReg', 'Constant', 'GRU_RNN', 'NN', 'LSTM_RNN', '1d_CNN', 'SimpleRNN_RNN']:
    # Model performances
    if not isdir(join(out, model_type)):
        print(join(out, model_type), 'not found')
        continue
    # Read log
    log = pd.read_csv(join(out, model_type, 'log.csv')) # , usecols=['model'] + measure_labels)
    log['model_type'] = model_type
    # Concatenate log with df
    performance = pd.concat([performance, log])
    
    # Loop through all trained models
    for trained_models in glob.glob(join(out, model_type, 'model*')):
        for model in trained_models.split('\n'):
            model_name = os.path.split(model)[-1]
            if isfile(join(model, 'train_args.pickle')):
                # Get training parameters of model to change some columns names
                # Also to show which different hyperparameters are tuned
                model_args = pickle.load(open(join(model, 'train_args.pickle'), 'rb'))
                
                # If recurrent specify which one SimpleRNN, LSTM, or GRU
                if 'recurrent_layer' in model_args.keys():
                    model_args['model_type'] = model_args['recurrent_layer'].__name__ + '_RNN'
                    del(model_args['recurrent_layer'])
                else:
                    model_args['model_type'] = model_type
                
                # Change n_filters and hiddensize parameter to combined column (same thing)
                if 'n_filters' in model_args.keys():
                    model_args['hidden_size/n_filters'] = model_args['n_filters']
                    del(model_args['n_filters'])
                elif 'hidden_size' in model_args.keys():
                    model_args['hidden_size/n_filters'] = model_args['hidden_size']
                    del(model_args['hidden_size'])
            
            # Baseline model (no training args)
            else:
                model_args = {'model_type': model_type}
            
            model_args['model'] = model_name
            values.append(model_args)

# Show pandas dataframe of trained models evaluation metrics and the information in the log file
df = pd.DataFrame(values)
df = df.merge(performance, on=['model', 'model_type'])
df = df.rename(columns={'model': 'model_dir'})
# df = df.loc[df['layers'] != 5] # Remove layer=5, only trained for some
# df = df.drop(['model'], axis='columns')
# df = df.loc[:, ((~df.isnull().all()) & (df.nunique() != 1))]

# Rename model type to more interpretaable name
type_to_name = {'RandomForestClass': 'RF', 'RandomForestReg': 'RF',
                'LinearSVMClass': 'SVM', 'LinearSVMReg': 'SVM', 'Constant': 'Constant', 'NN': 'NN',
                'GRU_RNN': 'GRU', 'LSTM_RNN': 'LSTM', '1d_CNN': 'CNN', 'SimpleRNN_RNN': 'SimpleRNN'}
df['model'] = df['model_type'].apply(lambda x: type_to_name[x])

df.groupby('model').first()

<b> Make the appendix </b>

In [None]:
# Sort the big dataframe by the first measure label (MAE/AUC) to define best model
stats_all_models = df.sort_values(by=measure_labels[0])\
        .loc[:, ~df.columns.isin(['model_type', 'model_dir'])]

# Reformat floats to .3 precision
stats_all_models['L2'] = stats_all_models['L2'].map('{:.0e}'.format)
# stats_all_models['layers'] = stats_all_models['layers'].map('{:g}'.format)
stats_all_models['hidden_size/n_filters'] = stats_all_models['hidden_size/n_filters'].map('{:g}'.format)
if 'n_estimators' in stats_all_models.columns:
    stats_all_models['n_estimators'] = stats_all_models['n_estimators'].map('{:g}'.format)
if 'epsilon' in stats_all_models.columns:
    stats_all_models['epsilon'] = stats_all_models['epsilon'].map('{:.0e}'.format)

# For each measure (MAE, RMSE, etc.) reformat precision of float to .3
# Also change column names
for m in measure_labels:
    stats_all_models[m] = stats_all_models[m].map('{:.3f}'.format)
    new_m = m.split('_')
    new_m[0] = 'train'
    new_m = '_'.join(new_m)
    stats_all_models[new_m] = stats_all_models[new_m].map('{:.3f}'.format)
    stats_all_models = stats_all_models.rename(columns={m: 'test ' + m.split('_')[-1],
                                                        new_m: 'train ' + new_m.split('_')[-1]})

# Replace nans with -, looks nicer
stats_all_models = stats_all_models.replace(np.nan, '-')
stats_all_models = stats_all_models.replace('nan', '-')
# Drop some uninformative info
stats_all_models = stats_all_models.drop(['epochs', 'best epoch', 'elapsed time'], axis='columns')

# Sort the dataframe by model, test MAE for regression
# Sort the dataframe by model, test AUC, test F1, test Accuracy
stats_all_models = stats_all_models.sort_values(by=['model', 'test MAE'] if prob[0] == 'regression'  
                                                    else ['model', 'test AUC', 'test F1', 'test Accuracy'],
                                                ascending=(True if prob[0] == 'regression' else False)
                                               )
stats_all_models.head()

In [43]:
# Save appendix
stats_all_models.to_csv('tables/all_models_{}.csv'.format(pred[0]), index=False)

In [None]:
# Define x labels and layout plot
if prob[0] == 'classification':
    col_measures = ['test AUC', 'test F1', 'test Accuracy']
    rows, cols = 2, 2
else:
    col_measures = ['test MAE', 'test RMSE', 'test Max AE', 'test R2']
    rows, cols = 2, 2

# Color dict to sort the different models
custom_dict = {'CNN': 0, 'LSTM': 1, 'GRU': 2, 'SimpleRNN': 3, 'NN': 4, 'RF': 5, 'SVM': 6, 'Constant': 7}

# Define figures and axes
fig, axs = plt.subplots(rows, cols, figsize=(5.972, 8.67 * .5), sharex=True, sharey=False)

# Loop through each different measure
for i, m, ax in zip(range(len(col_measures)), col_measures, axs.reshape(-1)):
    stats_all_models[m] = stats_all_models[m].astype(float)
    # Barplot the measure m in ax i
    sns.barplot(x='model', y=m, hue='model', dodge=False,
                data=stats_all_models.sort_values(by=['model'], key=lambda x: x.map(custom_dict)),
                ci=100, errwidth=1.5, capsize=.05,
                estimator=min if m != 'test R2' and prob[0] == 'regression' else max,
                ax=ax)
    ax.get_legend().remove()
    ax.set_xlabel('')
    ax.set_xticks([])
    ax.set_ylabel(' '.join(m.split(' ')[1:]))
    
    # Set x and y lim
    min_val = np.inf
    max_val = -np.inf
    for p in ax.patches:
        val = p.get_height()
        if val < min_val:
            min_val = val
        if val > max_val:
            max_val = val
    ax.set_ylim((min_val * 0.98, max_val * 1.01))

# Remove last axes (empty)
if prob[0] == 'classification':
    axs.reshape(-1)[-1].remove()

# Add legend and save plots
axs.reshape(-1)[1].legend(loc=7, bbox_to_anchor=(1.8, 0.3), prop={"size":8})
fig.tight_layout()
fig.subplots_adjust(right=0.8)
plt.savefig('output/figures/best_models_{}.pdf'.format(pred[0]))
plt.savefig('output/figures/best_models_{}.png'.format(pred[0]))

# Performance Time Steps + Status Group

<b> Load data for RNN and other models </b>

In [None]:
data = pickle.load(open('data/reshaped/PDS5_IRP_{}.pickle'.format(pred[0]), 'rb'))
RNN_data = pickle.load(open('data/reshaped/PDS5_IRP_RNN_{}.pickle'.format(pred[0]), 'rb'))

#### Load all tuned models using the wrappers

In [None]:
%%capture
# Use dictionary to use propper model wrapper with model name
if prob[0] == 'regression':
    name_to_model = {'RF': RandomForestReg, 'SVM': LinearSVMReg, 'Constant': Constant, 'GRU': RNN, 'LSTM': RNN,
                     'CNN': CNN, 'NN': NN, 'SimpleRNN': RNN}
else:
    name_to_model = {'RF': RandomForestClass, 'SVM': LinearSVMClass, 'Constant': Constant, 'GRU': RNN,
                     'LSTM': RNN, 'CNN': CNN, 'NN': NN, 'SimpleRNN': RNN}
    
model_to_name = dict((v,k) for k, v in name_to_model.items())

# Load all models using the model wrappers
all_models = []
for name, model_dir, model in zip(df['model_type'].values, df['model_dir'].values,
                                  df['model'].values):
    # RNN
    if name in ['SimpleRNN', 'LSTM', 'GRU']:
        all_models.append(name_to_model[model](data=RNN_data, outdir=join(out, name, model_dir), load_from_dir=True))
    # Other ML algorithms
    elif name != 'Constant':
        all_models.append(name_to_model[model](data=data, outdir=join(out, name, model_dir), load_from_dir=True))
    # Constant
    else:
        all_models.append(Constant(data=data, outdir=out, load_from_dir=False))

#### Calculate performance per time step

In [None]:
all_values = []

# Loop through all models
for mod in all_models:
    # Load plotter
    if mod.__class__.__name__ == 'RNN':
        plotter = Plotter(data=RNN_data, model=mod, outdir=mod.outdir, save_mode=False, show_mode=False)
        name = mod.recurrent_layer.__name__
    else:
        name = mod.__class__.__name__
        plotter = Plotter(data=data, model=mod, outdir=mod.outdir, save_mode=False, show_mode=False)
    
    # Get the evaluation metrics per time step
    for p in pred:
        values = plotter.forecasting_measures('test', p).values
        values = np.hstack([values, np.vstack([name] * values.shape[0])])
        all_values.append(values)
del(plotter)

# Create pandas dataframe of evaluation metrics
performance_timesteps = pd.DataFrame(np.concatenate(all_values),
                                     columns=['n', 'time step (t)', 'xlabel', 'metric', 'value', 'model'])
performance_timesteps = performance_timesteps.replace(to_replace=r'RandomForest.+', value='RF', regex=True)
performance_timesteps = performance_timesteps.replace(to_replace=r'.+SVM.+', value='SVM', regex=True)
performance_timesteps['value'] = performance_timesteps['value'].astype(float)
performance_timesteps.head()

<b> Plot performance per time step </b>

In [None]:
# Get correct metrics
if prob[0] == 'regression':
    measures = ['MAE', 'RMSE', 'Max AE', 'R2']
else:
    measures = ['AUC', 'F1', 'Accuracy']

# Create dictionary to order the models in the same way
custom_dict = {'CNN': 0, 'LSTM': 1, 'GRU': 2, 'SimpleRNN': 3, 'NN': 4, 'RF': 5, 'SVM': 6, 'Constant': 7}

# Define figure and axes
fig, axs = plt.subplots(len(measures), 1, figsize=(5.972, 8.67 * .8), sharex=True, sharey=False)
for i, m, ax in zip(range(len(measures)), measures, axs):
    # Barplot each measure (m) at axis i
    sns.barplot(x='xlabel', y='value', hue='model',
                data=performance_timesteps.loc[performance_timesteps['metric']==m]\
                                          .sort_values(by=['model', 'time step (t)'],
                                                       key=lambda x: x.map(custom_dict)),
                ci=100, errwidth=1.5, capsize=.05,
                estimator=min if m != 'R2' and prob[0] == 'regression' else max,
                ax=ax)
    ax.get_legend().remove()
    ax.set_xlabel('')
    ax.set_ylabel(m)
    
    # Set x and y lim
    min_val = np.inf
    max_val = -np.inf
    for p in ax.patches:
        val = p.get_height()
        if val < min_val:
            min_val = val
        if val > max_val:
            max_val = val
    ax.set_ylim((min_val * 0.98, max_val * 1.01))

# Add legend and save figure
axs[1].legend(loc=7, bbox_to_anchor=(1.3, 0.5), prop={"size":8})
fig.tight_layout()
fig.subplots_adjust(right=0.8)
plt.savefig('results/figures/temporal_performance_{}.pdf'.format(pred[0]))
plt.savefig('results/figures/temporal_performance_{}.png'.format(pred[0]))

#### Calculate performance per status group

In [None]:
all_values = []

# Loop through all models
for mod in all_models:
    # Use the plotter wrapper to get the measures
    if mod.__class__.__name__ == 'RNN':
        plotter = Plotter(data=RNN_data, model=mod, outdir=mod.outdir, save_mode=False, show_mode=False)
        name = mod.recurrent_layer.__name__
    else:
        name = mod.__class__.__name__
        plotter = Plotter(data=data, model=mod, outdir=mod.outdir, save_mode=False, show_mode=False)
    
    # Calculate the measures per status group for each label (only 1)
    for p in pred:
        values = plotter.measures_per_status('test', p).values
        values = np.hstack([values, np.vstack([name] * values.shape[0])])
        all_values.append(values)
del(plotter)

# Create pandas dataframe of the evaluation metrics
performance_status = pd.DataFrame(np.concatenate(all_values),
                                  columns=['status', 'measure', 'value', 'model'])
performance_status = performance_status.replace(to_replace=r'RandomForest.+', value='RF', regex=True)
performance_status = performance_status.replace(to_replace=r'.+SVM.+', value='SVM', regex=True)
performance_status.head()

In [None]:
# Define correct measure names
if prob[0] == 'regression':
    measures = ['MAE', 'RMSE', 'Max AE', 'R2']
else:
    measures = ['AUC', 'F1', 'Accuracy']

# Use dict to order models
custom_dict = {'CNN': 0, 'LSTM': 1, 'GRU': 2, 'SimpleRNN': 3, 'NN': 4, 'RF': 5, 'SVM': 6, 'Constant': 7}
    
# Figure and axes
fig, axs = plt.subplots(len(measures), 1, figsize=(5.972, 8.67 * .8), sharex=True, sharey=False)
for i, m, ax in zip(range(len(measures)), measures, axs):
    # Barplot each measure (m) at axis i
    sns.barplot(x='status', y='value', hue='model', dodge=True,
                data=performance_status.loc[performance_status['measure'] == m]\
                                          .sort_values(by=['model'], key=lambda x: x.map(custom_dict)),
                ci=100, errwidth=1.5, capsize=.05,
                estimator=min if m != 'R2' and prob[0] == 'regression' else max,
                ax=ax)
    ax.get_legend().remove()
    if i != len(measures) - 1:
        ax.set_xlabel('')
    ax.set_ylabel(m)
    
    # Set x and y lim
    min_val = np.inf
    max_val = -np.inf
    for p in ax.patches:
        val = p.get_height()
        if val < min_val:
            min_val = val
        if val > max_val:
            max_val = val
    ax.set_ylim((min_val * 0.95, max_val * 1.01))

# Set legend and save plots
axs[1].legend(loc=7, bbox_to_anchor=(1.3, 0.5), prop={"size":8})
fig.tight_layout()
fig.subplots_adjust(right=0.8)
plt.savefig('results/figures/status_performance_{}.pdf'.format(pred[0]))
plt.savefig('results/figures/status_performance_{}.png'.format(pred[0]))

# Define best models

In [None]:
# Best models, test MAE the lowest or test AUC the highest
if prob[0] == 'regression':
    best_models = df.loc[df.groupby('model_type')[measure_labels[0]].transform('min') == df[measure_labels[0]]]
    best_models = best_models.sort_values(by=measure_labels)
else:
    best_models = df.loc[df.groupby('model_type')[measure_labels[0]].transform('max') == df[measure_labels[0]]]
    best_models = best_models.sort_values(by=measure_labels, ascending=False)
best_models

<b> Find for each tuned model the best model </b>

In [None]:
%%capture

# Use correct wrapper with correct name
if prob[0] == 'regression':
    name_to_model = {'RF': RandomForestReg, 'SVM': LinearSVMReg, 'Constant': Constant, 'GRU': RNN, 'LSTM': RNN,
                     'CNN': CNN, 'NN': NN, 'SimpleRNN': RNN}
else:
    name_to_model = {'RF': RandomForestClass, 'SVM': LinearSVMClass, 'Constant': Constant, 'GRU': RNN,
                     'LSTM': RNN, 'CNN': CNN, 'NN': NN, 'SimpleRNN': RNN}
    
model_to_name = dict((v,k) for k, v in name_to_model.items())

# Select models based on best_models variable
selected_models = []
for name, model_dir, model in zip(best_models['model_type'].values, best_models['model_dir'].values,
                                  best_models['model'].values):
    if name in ['SimpleRNN', 'LSTM', 'GRU']:
        selected_models.append(name_to_model[model](data=RNN_data, outdir=join(out, name, model_dir), load_from_dir=True))
    elif name != 'Constant':
        selected_models.append(name_to_model[model](data=data, outdir=join(out, name, model_dir), load_from_dir=True))
    else:
        selected_models.append(Constant(data=data, outdir=out, load_from_dir=False))

<b> Out of all best tuned models, take the best model </b>

In [None]:
# The best found model for each label
best_model_dict = {'drive': 'GRU', 'cUHDRS': 'GRU', 'motscore': 'CNN', 'tfcscore': 'GRU',
                   'sdmt1': 'GRU', 'swrt1': 'GRU'
                  }
target_model = best_model_dict[pred[0]]

# loop through all models that were the best of the type
for mod in selected_models:
    # If the models is the one that is the best the name
    if (target_model == mod.__class__.__name__ or
        (mod.__class__.__name__ == 'RNN' and mod.recurrent_layer.__name__ == target_model)):
        best_mod = mod
        break

# Print the file of the best model
print(best_mod.__class__.__name__, best_mod.outdir)
with open('results/best_model_{}'.format(pred[0]), 'w') as outfile:
    outfile.write('{}\t{}\t{}\n'.format(pred[0], best_mod.__class__.__name__, best_mod.outdir))