In [1]:
#load datasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import os

In [2]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
def mae(ytrue, ypred): return mean_absolute_error(ytrue, ypred)
def mape(ytrue, ypred): return 100 * mean_absolute_percentage_error(ytrue, ypred)
def smape(ytrue, ypred): return 200 * np.mean(np.abs(ytrue - ypred) / (np.abs(ytrue) + np.abs(ypred)))
def rmse(ytrue, ypred): return math.sqrt(mean_squared_error(ytrue, ypred))
def rmae(ytrue, ypred, ynaive): return mean_absolute_error(ytrue, ypred) / mean_absolute_error(ytrue, ynaive)
def dae(ytrue, ypred):
    try:
        error = mae(ytrue.mean(axis=1), ypred.mean(axis=1))
    except:
        error = mae(ytrue, ypred)
    return error

In [3]:
os.chdir("c://Users//matis//OneDrive//Documents//ENSTA//3A//Capstone//project//tschora//EPFDAML")
from work.models.Feature import Naive
from work.models.Splitter import MySplitter
dataset = "2"

def load_naive_forecasts(dataset, country, labels):  
    naive_forecasts = {"train" : {}, "test" : {}, "validation": {}, "test_recalibrated" : {}}
    os.chdir("c://Users//matis//OneDrive//Documents//ENSTA//3A//Capstone//project//tschora//EPFDAML")

    if dataset in ("", "2", "3"): nval = 362
    else: nval = 365  
    spliter = MySplitter(nval, shuffle=False)
    model_wrapper = Naive("NAIVE", f"EPF{dataset}_{country}", labels) 
    # Need to re-split for taking the validation prices
    X, y = model_wrapper.load_train_dataset()
    ((Xtr, ytr), (Xv, yv)) = spliter(X, y)
    # Also computes the naive forecasts    
    naive_forecasts["validation"][country] = model_wrapper.predict(None, Xv) 
    naive_forecasts["train"][country] = model_wrapper.predict(None, Xtr)           
    Xt, yt = model_wrapper.load_test_dataset()
    naive_forecasts["test"][country] = model_wrapper.predict(None, Xt)
    naive_forecasts["test_recalibrated"][country] = model_wrapper.predict(None, Xt)  

    return naive_forecasts




In [4]:
country = "FRBE"
version = "2"


for country in ["FRBE", "DEBE", "FRDE"]:
    # print("country", country)
    os.chdir("c://Users//matis//OneDrive//Documents//ENSTA//3A//Capstone//project//tschora//EPFDAML//data")
    path_predict = f"datasets\EPF{version}_{country}\MLP_TSCHORA_EPF{version}_{country}_test_predictions.csv"
    df_predict = pd.read_csv(path_predict, sep=",", index_col=None)
    path_true = f"datasets/EPF{version}_{country}/test.csv"
    df_true = pd.read_csv(path_true, sep=",", index_col=None)

    if len(country)==4:
        countries = [country[:2], country[2:]]
    elif len(country)==6:
        countries = [country[:2], country[2:4], country[4:]]
    else:  countries = [country]

    cols_price = []
    for c in countries:
        cols_price += [c+"_price_"+str(i) for i in range(24)]
    
    naive = load_naive_forecasts(version, country, cols_price)

    # Metrics for multioutput models
    # generalize metrics (on all countries)
    ytrue = df_true[cols_price].values.reshape(1,-1)
    ypred = df_predict.values.reshape(1,-1)
    ynaive = naive["test"][country].reshape(1,-1)
    # print(ytrue.shape, ypred.shape, ynaive.shape)
    mae_val,mape_val, smape_val, rmse_val, rmae_val, dae_val = mae(ytrue, ypred), mape(ytrue, ypred), smape(ytrue, ypred), rmse(ytrue, ypred), rmae(ytrue, ypred, ynaive), dae(ytrue, ypred)

    # single metrics (on single country)
    mae_list, mape_list, smape_list, rmse_list, rmae_list, dae_list = [], [], [], [], [], []
    for i, c in enumerate(countries):
        cols_c = [c+"_price_"+str(i) for i in range(24)]
        ytrue = df_true[cols_c].values.reshape(1,-1)
        ypred = df_predict.iloc[:, 24*i:24*(i+1)].values.reshape(1,-1)
        ynaive = naive["test"][country][:, 24*i:24*(i+1)].reshape(1,-1)
        mae_list.append(mae(ytrue, ypred))
        mape_list.append(mape(ytrue, ypred))
        smape_list.append(smape(ytrue, ypred))
        rmse_list.append(rmse(ytrue, ypred))
        rmae_list.append(rmae(ytrue, ypred, ynaive))
        dae_list.append(dae(ytrue, ypred))



    # Metrics associated to single output models concatnated :
    dataset_list_pred = []
    dataset_list_true = []
    dataset_list_naive = []
    for c in countries:
        cols_c = [c+"_price_"+str(i) for i in range(24)]
        os.chdir("c://Users//matis//OneDrive//Documents//ENSTA//3A//Capstone//project//tschora//EPFDAML//data")
        dataset_list_pred.append(pd.read_csv(
            f"datasets\EPF{version}_{c}\MLP_TSCHORA_EPF{version}_{c}_test_predictions.csv",
            sep=",").values.reshape(1,-1))
        dataset_list_true.append(pd.read_csv(
            f"datasets/EPF{version}_{c}/test.csv",
            sep=",")[cols_c].values.reshape(1,-1))
        dataset_list_naive.append(load_naive_forecasts(version, c, cols_c)["test"][c].reshape(1,-1))
    ytrue = np.concatenate(dataset_list_true)
    ypred = np.concatenate(dataset_list_pred)
    ynaive = np.concatenate(dataset_list_naive)
    # General metrics
    mae_val2,mape_val2, smape_val2, rmse_val2, rmae_val2, dae_val2 = mae(ytrue, ypred), mape(ytrue, ypred), smape(ytrue, ypred), rmse(ytrue, ypred), rmae(ytrue, ypred, ynaive), dae(ytrue, ypred)
    # Single metrics (calculate on single country)
    mae_list2, mape_list2, smape_list2, rmse_list2, rmae_list2, dae_list2 = [], [], [], [], [], []
    for i, c in enumerate(countries):
        cols_c = [c+"_price_"+str(i) for i in range(24)]
        ytrue = dataset_list_true[i]
        ypred = dataset_list_pred[i]
        ynaive = dataset_list_naive[i]
        mae_list2.append(mae(ytrue, ypred))
        mape_list2.append(mape(ytrue, ypred))
        smape_list2.append(smape(ytrue, ypred))
        rmse_list2.append(rmse(ytrue, ypred))
        rmae_list2.append(rmae(ytrue, ypred, ynaive))
        dae_list2.append(dae(ytrue, ypred))

    # print results in dataframe
    res_gen = pd.DataFrame({"MAE": mae_val, "MAPE": mape_val, "SMAPE": smape_val, "RMSE": rmse_val, "RMAE": rmae_val, "DAE": dae_val}, index=[f"General ({country})"])
    res_uni = pd.DataFrame({"MAE": mae_list, "MAPE": mape_list, "SMAPE": smape_list, "RMSE": rmse_list, "RMAE": rmae_list, "DAE": dae_list}, index=countries)
    res_gen2 = pd.DataFrame({"MAE": mae_val2, "MAPE": mape_val2, "SMAPE": smape_val2, "RMSE": rmse_val2, "RMAE": rmae_val2, "DAE": dae_val2}, index=["General"])
    res_uni2 = pd.DataFrame({"MAE": mae_list2, "MAPE": mape_list2, "SMAPE": smape_list2, "RMSE": rmse_list2, "RMAE": rmae_list2, "DAE": dae_list2}, index=countries)
    res = pd.concat([res_gen, res_uni])
    res2 = pd.concat([res_gen2, res_uni2])
    print(f"\nResults for {country} - EPF{version} - MLP_TSCHORA_EPF{version}_{country}")
    print(res)
    print(f"\nResults for {countries} (single models concatenated) - EPF{version} - MLP_TSCHORA_EPF{version}_{countries}")
    print(res2)

        






Results for FRBE - EPF2 - MLP_TSCHORA_EPF2_FRBE
                     MAE       MAPE      SMAPE       RMSE      RMAE       DAE
General (FRBE)  5.721823  21.957466  15.008659  14.882179  0.736975  2.136997
FR              4.791156  15.025672  13.934142  12.382026  0.714893  1.602307
BE              6.652491  28.889260  16.083176  17.018929  0.753742  2.671687

Results for ['FR', 'BE'] (single models concatenated) - EPF2 - MLP_TSCHORA_EPF2_['FR', 'BE']
              MAE       MAPE      SMAPE       RMSE      RMAE       DAE
General  5.744474  20.498599  14.617581  15.033421  0.739892  1.569685
FR       4.590051  14.170610  13.132941  12.266955  0.684886  1.559840
BE       6.898896  26.826587  16.102222  17.364599  0.781661  1.579529

Results for DEBE - EPF2 - MLP_TSCHORA_EPF2_DEBE
                     MAE          MAPE      SMAPE       RMSE      RMAE  \
General (DEBE)  5.677762  1.082490e+14  18.201424  13.010289  0.705734   
DE              4.742462  2.164981e+14  20.642291   6.484919  0.