In [1]:
import pandas as pd
import numpy as np
import os

from scipy.stats import pearsonr
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
def perform_metrics(y_truth, y_forecasted): 

    mae = round(mean_absolute_error(y_truth, y_forecasted) ,8)
    mape = round(np.mean(np.abs((y_truth - y_forecasted) / y_truth)) * 100, 8)
    mse = round(mean_squared_error(y_truth, y_forecasted), 8)
    rmse = round(mse**(0.5), 8)
    rmspe = round((np.sqrt(np.mean(np.square((y_truth - y_forecasted) / y_truth)))) * 100, 8)
    r2 = round(r2_score(y_truth, y_forecasted), 8)
    try:
        corr, _ = pearsonr(y_truth, y_forecasted)
        pearson = round(corr, 8)
    except:
        pearson = None

    return mae, mape, mse, rmse, rmspe, r2, pearson

In [3]:
list_files = os.listdir('/home/ricardo/Documents/tcc_files/TCC/tcc_scripts_notebook/datasets/train_test')
list_files

['test_ma_1d.csv',
 'test_naive_1h.csv',
 'test_prophet_1h.csv',
 'test_lstm_1h.csv',
 'test_ses_1d.csv',
 'gridcv',
 'test_ar_1d.csv',
 'test_lstm_1d.csv',
 'test_arima_1d.csv',
 'test_naive_1d.csv',
 'test_arima_1h.csv',
 'test_ses_1h.csv',
 'train_lstm_1d.csv',
 'test_prophet_1d.csv',
 'test_ar_1h.csv',
 'test_ma_1h.csv']

In [36]:
# best_params = {
#     'test_ma_1d.csv': {'p_order': 0, 'd_order': 1, 'q_order': 5},
#     'test_ma_1h.csv': {'p_order': 0, 'd_order': 1, 'q_order': 2},
#     'test_naive_1h.csv': {None},
#     'test_naive_1d.csv': {None},
#     'test_prophet_1h.csv': {'diff_order': 1},
#     'test_prophet_1d.csv': {'diff_order': 1},
#     'test_ses_1d.csv': {'alpha': 0.9},
#     'test_ses_1h.csv': {'alpha': 0.9},
#     'test_ar_1d.csv': {'p_order': 5, 'd_order': 1, 'q_order': 0},
#     'test_ar_1h.csv': {'p_order': 4, 'd_order': 1, 'q_order': 0},
#     'test_arima_1d.csv': {'p_order': 5, 'd_order': 0, 'q_order': 4},
#     'test_arima_1h.csv': {'p_order': 4, 'd_order': 1, 'q_order': 3},
#     'test_lstm_1d.csv': {'batch_size': 5, 'num_units': 50, 'epochs': 200},
#     'test_lstm_1h.csv': {'batch_size': 100, 'num_units': 100, 'epochs': 200},
# }

best_params = {
    'test_ma_1d.csv': {"ordem_p": 0, "ordem_d": 1, "ordem_q": 5},
    'test_ma_1h.csv': {"ordem_p": 0, "ordem_d": 1, "ordem_q": 2},
    'test_naive_1h.csv': {},
    'test_naive_1d.csv': {},
    'test_prophet_1h.csv': {"ordem_d": 1},
    'test_prophet_1d.csv': {"ordem_d": 1},
    'test_ses_1d.csv': {"alfa": 0.9},
    'test_ses_1h.csv': {"alfa": 0.9},
    'test_ar_1d.csv': {"ordem_p": 5, "ordem_d": 1, "ordem_q": 0},
    'test_ar_1h.csv': {"ordem_p": 4, "ordem_d": 1, "ordem_q": 0},
    'test_arima_1d.csv': {"ordem_p": 5, "ordem_d": 0, "ordem_q": 4},
    'test_arima_1h.csv': {"ordem_p": 4, "ordem_d": 1, "ordem_q": 3},
    'test_lstm_1d.csv': {"lote": 5, "unidades": 50, "ciclos": 200},
    'test_lstm_1h.csv': {"lote": 100, "unidades": 100, "ciclos": 200},
}

data = {
    'model': [],
    'periodicity': [],
    'best_params': [],
    'mae': [],
    'mape': [],
    'mse': [],
    'rmse': [],
    'rmspe': [],
    'r2': [],
    'pearson': []
}

list_files = os.listdir('/home/ricardo/Documents/tcc_files/TCC/tcc_scripts_notebook/datasets/train_test')

for file in list_files:
    
    file_aux = file.split('_')
    
    if file_aux[0] == 'test':
        
        df_aux = pd.read_csv('/home/ricardo/Documents/tcc_files/TCC/tcc_scripts_notebook/datasets/train_test/'+file, sep = '\t')
        
        mae, mape, mse, rmse, rmspe, r2, pearson = perform_metrics(df_aux['y_test'], df_aux['y_test_predict'])
        
        model = file_aux[1].upper()
        periodicity = file_aux[2].split('.')[0].upper()

        data['model'].append(model)
        data['periodicity'].append(periodicity)
        data['best_params'].append(best_params[file])
        data['mae'].append(mae)
        data['mape'].append(mape)
        data['mse'].append(mse)
        data['rmse'].append(rmse)
        data['rmspe'].append(rmspe)
        data['r2'].append(r2)
        data['pearson'].append(pearson)
        
df_metrics = pd.DataFrame(data).sort_values(by=['model']).reset_index(drop = True)

In [44]:
df_metrics = df_metrics.sort_values(by=['rmse']).reset_index(drop = True)
a = df_metrics[df_metrics['periodicity'] == '1D'].copy()
a.columns = ['Modelo', 'Periodicidade', 'Melhores parametros', 'MAE', 'MAPE', 'MSE', 'RMSE', 'RMSPE', 'R²', 'Pearson']
a = a.drop(columns = ['MAE', 'MSE', 'RMSPE', 'Pearson'])
a = a.drop(columns = ['Melhores parametros', 'Periodicidade'])
# a = a.drop(columns = ['MAPE', 'RMSE', 'R²', 'Periodicidade'])
a.round(4)

Unnamed: 0,Modelo,MAPE,RMSE,R²
7,SES,2.9498,397.604,0.9311
8,NAIVE,2.9779,400.8768,0.9299
9,LSTM,3.0512,403.4848,0.929
10,PROPHET,3.044,405.0744,0.9284
11,MA,3.0547,408.3656,0.9273
12,AR,3.0618,409.0631,0.927
13,ARIMA,3.1091,412.0663,0.926
