![spam](img/model_variants.png)

<summary>
    <font size="4" color="orange"><b>1. Importing libraries and functions</b></font>
</summary>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Easy graphs with plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.metrics import mean_absolute_percentage_error
import pickle as pk

import scipy
from scipy import stats

<summary>
    <font size="4" color="orange"><b>2. Loading variants results datasets</b></font>
</summary>

In [2]:
lstm_ed_10h_3p = pd.read_csv('./data/corrida_pcagral_10_3.csv')
lstm_ed_10h_5p = pd.read_csv('./data/corrida_pcagral_10_5.csv')
lstm_ed_25h_3p = pd.read_csv('./data/corrida_pcagral_25_3.csv')
lstm_ed_25h_5p = pd.read_csv('./data/corrida_pcagral_25_5.csv')
lstm_ed_100h_3p = pd.read_csv('./data/corrida_pcagral_100_3.csv')
lstm_ed_100h_5p = pd.read_csv('./data/corrida_pcagral_100_5.csv')

In [3]:
lstm_ed_100h_5p

Unnamed: 0.1,Unnamed: 0,FECHA-HORA,MAPE,R2
0,0,2021-03-29 23:00:00,1.339173,0.995702
1,1,2021-03-30 23:00:00,1.045659,0.991121
2,2,2021-03-31 23:00:00,3.625760,0.951301
3,3,2021-04-01 23:00:00,2.329484,0.971930
4,4,2021-04-02 23:00:00,2.037539,0.978940
...,...,...,...,...
353,353,2022-03-17 23:00:00,1.131393,0.993118
354,354,2022-03-18 23:00:00,1.439274,0.984754
355,355,2022-03-19 23:00:00,1.475550,0.978276
356,356,2022-03-20 23:00:00,3.167992,0.975520


In [4]:
#function to modify features names according to variant
def ajustar_df(df,nombre):
   # del df['Unnamed: 0']
    name_mape = f'MAPE_{nombre}'
    name_r2 = f'R2_{nombre}'
    df.rename(columns={'MAPE':name_mape, 'R2':name_r2}, inplace=True)

In [5]:
#appling ajustar_df function
ajustar_df(lstm_ed_10h_3p, 'lstm_ed_10h_3p')
ajustar_df(lstm_ed_10h_5p, 'lstm_ed_10h_5p')
ajustar_df(lstm_ed_25h_3p, 'lstm_ed_25h_3p')
ajustar_df(lstm_ed_25h_5p, 'lstm_ed_25h_5p')
ajustar_df(lstm_ed_100h_3p, 'lstm_ed_100h_3p')
ajustar_df(lstm_ed_100h_5p, 'lstm_ed_100h_5p')

In [6]:
#merging all variants dataset
comparativo = pd.merge(lstm_ed_10h_3p, lstm_ed_10h_5p, on='FECHA-HORA', how='left')
comparativo = pd.merge(comparativo, lstm_ed_25h_3p, on='FECHA-HORA', how='left')
comparativo = pd.merge(comparativo, lstm_ed_25h_5p, on='FECHA-HORA', how='left')
comparativo = pd.merge(comparativo, lstm_ed_100h_3p, on='FECHA-HORA', how='left')
comparativo = pd.merge(comparativo, lstm_ed_100h_5p, on='FECHA-HORA', how='left')
comparativo.columns

  comparativo = pd.merge(comparativo, lstm_ed_25h_5p, on='FECHA-HORA', how='left')
  comparativo = pd.merge(comparativo, lstm_ed_100h_5p, on='FECHA-HORA', how='left')


Index(['Unnamed: 0_x', 'FECHA-HORA', 'MAPE_lstm_ed_10h_3p',
       'R2_lstm_ed_10h_3p', 'Unnamed: 0_y', 'MAPE_lstm_ed_10h_5p',
       'R2_lstm_ed_10h_5p', 'Unnamed: 0_x', 'MAPE_lstm_ed_25h_3p', 'REAL',
       'ESTIMADO', 'Unnamed: 0_y', 'MAPE_lstm_ed_25h_5p', 'R2_lstm_ed_25h_5p',
       'Unnamed: 0_x', 'MAPE_lstm_ed_100h_3p', 'R2_lstm_ed_100h_3p',
       'Unnamed: 0_y', 'MAPE_lstm_ed_100h_5p', 'R2_lstm_ed_100h_5p'],
      dtype='object')

In [7]:
#creating new dataframe 
compara = pd.melt(comparativo, value_vars=['MAPE_lstm_ed_10h_3p', 'MAPE_lstm_ed_10h_5p', 
                                           'MAPE_lstm_ed_25h_3p', 'MAPE_lstm_ed_25h_5p',
                                           'MAPE_lstm_ed_100h_3p', 'MAPE_lstm_ed_100h_5p'])

In [8]:
compara

Unnamed: 0,variable,value
0,MAPE_lstm_ed_10h_3p,2.989658
1,MAPE_lstm_ed_10h_3p,1.101525
2,MAPE_lstm_ed_10h_3p,3.665796
3,MAPE_lstm_ed_10h_3p,2.089148
4,MAPE_lstm_ed_10h_3p,3.155257
...,...,...
2143,MAPE_lstm_ed_100h_5p,1.131393
2144,MAPE_lstm_ed_100h_5p,1.439274
2145,MAPE_lstm_ed_100h_5p,1.475550
2146,MAPE_lstm_ed_100h_5p,3.167992


In [14]:
#violin plot. Model Variants
import plotly.graph_objects as go

from plotly.offline import iplot

fig = go.Figure()

corridas = compara['variable'].unique()

for corrida in corridas:
    fig.add_trace(go.Violin(x=compara['variable'][compara['variable'] == corrida],
                            y=compara['value'][compara['variable'] == corrida],
                            name=corrida,
                            box_visible=True,
                            meanline_visible=True))
    
fig.update_layout(title_text="Violin Plot. Model Hyperparameters Variants", height=600)     
fig.update_layout(showlegend=False) 
fig.show()
iplot(fig, image='svg', filename='Hyperparameters_Variants', image_width=1280, image_height=640)

In [15]:
#find mape > 5
mayoresde5 = compara[compara['value']> 4.99]

In [16]:
#new dataframe whit qtys
conteomayor5 = mayoresde5['variable'].value_counts().rename_axis('model').reset_index(name='qty_days_mape>5')
conteomayor5

Unnamed: 0,model,qty_days_mape>5
0,MAPE_lstm_ed_25h_3p,41
1,MAPE_lstm_ed_100h_5p,27
2,MAPE_lstm_ed_10h_3p,21
3,MAPE_lstm_ed_100h_3p,21
4,MAPE_lstm_ed_10h_5p,18
5,MAPE_lstm_ed_25h_5p,12


In [18]:
#Bar plot. MAPE>5
fig = go.Figure([go.Bar(x=conteomayor5['model'], y=conteomayor5['qty_days_mape>5'])])

fig.update_traces(marker_color=['#36CC96', '#34D6F4', '#6E78FA','#FCAC6D','#F06149' ,'#B16EFA'])
fig.update_layout(
        height=550, width=950,
        title=go.layout.Title(text="Models with higher amount of days with MAPE > 5"),
        xaxis_title="Models",
        yaxis_title="Days",
    )
fig.show()
iplot(fig, image='svg', filename='hyperparameters_MAPE_mayor_5', image_width=1280, image_height=640)