In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import math

# plotly
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.io as pio
import kaleido

import os
import datetime
from  datetime import timedelta, time
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
from sklearn.linear_model import LinearRegression 
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler,StandardScaler

In [None]:
model_name_list=['lr','svr','rf','lgbm','ann','3dcnn','cnnlstm']

# 1 step analysis

In [9]:
def cal_rank(df, model = 'lgbm', fh_step=1, rank_best=True, ):
    rank_list = []
    for site in set(df['site']):
        for d in set(df['Datetime'].dt.date):
            cond = (df['site'] == site) & (df['Datetime'].dt.date == d)
            df_day = df[cond]
            y_true = df_day[f'I_lead_{fh_step}step'].to_numpy()
            y_pred = df_day[f'I_pred_{model}'].to_numpy()
            if y_pred.shape[0] !=0 :
                mae = mean_absolute_error(y_true, y_pred)
                rank_list.append([site, d, mae])
    rank_df = pd.DataFrame(rank_list, columns=['site', 'Date', 'MAE'])
    rank_df = rank_df.sort_values(by='MAE',ascending=rank_best)
    return rank_df

def cal_nrmse(y_test, y_pred):
    rmse = mean_absolute_error(y_test, y_pred)
    y_bar = np.mean(y_test)
#     y_bar=100
#     print(y_bar)
    nrmse = rmse / y_bar * 100
    return nrmse

In [3]:
df1step = pd.read_csv('results_data/df_test_all_model_1step.csv',parse_dates=['Datetime'])

In [10]:
time_list_1step = pd.date_range("7:00:00", "17:00:00", freq="30min")
rmse_lr_list = []
nrmse_lr_list = []
mae_lr_list = []

rmse_rf_list = []
nrmse_rf_list = []
mae_rf_list = []

rmse_lgbm_list = []
nrmse_lgbm_list = []
mae_lgbm_list = []

rmse_svr_list = []
nrmse_svr_list = []
mae_svr_list = []

rmse_ann_list = []
nrmse_ann_list = []
mae_ann_list = []

rmse_3dcnn_list = []
nrmse_3dcnn_list = []
mae_3dcnn_list = []

rmse_cnnlstm_list = []
nrmse_cnnlstm_list = []
mae_cnnlstm_list = []

fh = 1

for time_fh in time_list_1step :
    forecated_time = (time_fh - timedelta(minutes=30*fh)).time()
    df = df1step[df1step['Datetime'].dt.time == forecated_time]

    rmse_lr = mean_squared_error(df['I_lead_1step'], df['I_pred_lr'])**(0.5)
    mae_lr = mean_absolute_error(df['I_lead_1step'], df['I_pred_lr'])
    nrmse_lr = cal_nrmse(df['I_lead_1step'], df['I_pred_lr'])
    rmse_lr_list.append(rmse_lr)
    nrmse_lr_list.append(nrmse_lr)
    mae_lr_list.append(mae_lr)
    
    rmse_svr = mean_squared_error(df['I_lead_1step'], df['I_pred_svr'])**(0.5)
    mae_svr = mean_absolute_error(df['I_lead_1step'], df['I_pred_svr'])
    nrmse_svr = cal_nrmse(df['I_lead_1step'], df['I_pred_svr'])
    rmse_svr_list.append(rmse_svr)
    nrmse_svr_list.append(nrmse_svr)
    mae_svr_list.append(mae_svr)
    
    rmse_rf = mean_squared_error(df['I_lead_1step'], df['I_pred_rf'])**(0.5)
    mae_rf = mean_absolute_error(df['I_lead_1step'], df['I_pred_rf'])
    nrmse_rf = cal_nrmse(df['I_lead_1step'], df['I_pred_rf'])
    rmse_rf_list.append(rmse_rf)
    nrmse_rf_list.append(nrmse_rf)
    mae_rf_list.append(mae_rf)
    
    rmse_lgbm = mean_squared_error(df['I_lead_1step'], df['I_pred_lgbm'])**(0.5)
    mae_lgbm = mean_absolute_error(df['I_lead_1step'], df['I_pred_lgbm'])
    nrmse_lgbm = cal_nrmse(df['I_lead_1step'], df['I_pred_lgbm'])
    rmse_lgbm_list.append(rmse_lgbm)
    nrmse_lgbm_list.append(nrmse_lgbm)
    mae_lgbm_list.append(mae_lgbm)
    
    rmse_ann = mean_squared_error(df['I_lead_1step'], df['I_pred_ann'])**(0.5)
    mae_ann = mean_absolute_error(df['I_lead_1step'], df['I_pred_ann'])
    nrmse_ann = cal_nrmse(df['I_lead_1step'], df['I_pred_ann'])
    rmse_ann_list.append(rmse_ann)
    nrmse_ann_list.append(nrmse_ann)
    mae_ann_list.append(mae_ann)
    
    rmse_3dcnn = mean_squared_error(df['I_lead_1step'], df['I_pred_3dcnn'])**(0.5)
    mae_3dcnn = mean_absolute_error(df['I_lead_1step'], df['I_pred_3dcnn'])
    nrmse_3dcnn = cal_nrmse(df['I_lead_1step'], df['I_pred_3dcnn'])
    rmse_3dcnn_list.append(rmse_3dcnn)
    nrmse_3dcnn_list.append(nrmse_3dcnn)
    mae_3dcnn_list.append(mae_3dcnn)
    
    rmse_cnnlstm = mean_squared_error(df['I_lead_1step'], df['I_pred_cnnlstm'])**(0.5)
    mae_cnnlstm = mean_absolute_error(df['I_lead_1step'], df['I_pred_cnnlstm'])
    nrmse_cnnlstm = cal_nrmse(df['I_lead_1step'], df['I_pred_cnnlstm'])
    rmse_cnnlstm_list.append(rmse_cnnlstm)
    nrmse_cnnlstm_list.append(nrmse_cnnlstm)
    mae_cnnlstm_list.append(mae_cnnlstm)
  

## Hourly performance table

In [11]:
model_name_list = ['LR','SVR','RF','LGBM','ANN','3D-CNN','CNN-LSTM']
dfnrmse_time = pd.DataFrame({'LR':nrmse_lr_list, 'SVR':nrmse_svr_list, 'RF':nrmse_rf_list,
                           'LGBM':nrmse_lgbm_list, 'ANN':nrmse_ann_list, '3D-CNN':nrmse_3dcnn_list,'CNN-LSTM':nrmse_cnnlstm_list}, index = time_list_1step.time)
dfnrmse_time

Unnamed: 0,LR,SVR,RF,LGBM,ANN,3D-CNN,CNN-LSTM
07:00:00,27.591912,17.901962,17.855631,18.1562,18.282254,20.773165,18.076949
07:30:00,18.691273,16.202054,16.179281,16.408484,16.480307,16.743995,16.1171
08:00:00,15.473081,13.52418,13.719748,13.712725,13.692162,13.608089,13.484862
08:30:00,14.797635,13.650129,14.022324,13.942332,13.654906,13.880466,13.370794
09:00:00,14.899417,14.183081,14.528488,14.494393,14.313873,14.423205,13.802399
09:30:00,15.945991,15.070803,15.421942,15.498218,15.169294,14.772403,14.390992
10:00:00,15.476247,14.510295,14.778386,14.847183,14.59595,13.731729,13.617125
10:30:00,15.341181,14.424817,14.839395,14.932411,14.539401,14.041982,13.698669
11:00:00,16.694071,15.185124,15.637193,15.78162,15.366155,14.874879,14.671972
11:30:00,15.882714,14.789818,15.406197,15.421409,14.847732,13.773981,13.817273


In [12]:
def format_min(row):
    min_val = row.min()
    return row.apply(lambda x: "\textbf{" + f'{x:.2f}' + "}" if x == min_val else f'{x:.2f}')
dfnrmse_latex = dfnrmse_time.apply(format_min,axis=1)
dfnrmse_latex = dfnrmse_latex.to_latex(index=True,escape=False)
print(dfnrmse_latex)

\begin{tabular}{llllllll}
\toprule
{} &     LR &             SVR &              RF &   LGBM &             ANN &          3D-CNN &        CNN-LSTM \\
\midrule
07:00:00 &  27.59 &           17.90 &  \textbf{17.86} &  18.16 &           18.28 &           20.77 &           18.08 \\
07:30:00 &  18.69 &           16.20 &           16.18 &  16.41 &           16.48 &           16.74 &  \textbf{16.12} \\
08:00:00 &  15.47 &           13.52 &           13.72 &  13.71 &           13.69 &           13.61 &  \textbf{13.48} \\
08:30:00 &  14.80 &           13.65 &           14.02 &  13.94 &           13.65 &           13.88 &  \textbf{13.37} \\
09:00:00 &  14.90 &           14.18 &           14.53 &  14.49 &           14.31 &           14.42 &  \textbf{13.80} \\
09:30:00 &  15.95 &           15.07 &           15.42 &  15.50 &           15.17 &           14.77 &  \textbf{14.39} \\
10:00:00 &  15.48 &           14.51 &           14.78 &  14.85 &           14.60 &           13.73 &  \textbf{13.62} \\
10

  dfnrmse_latex = dfnrmse_latex.to_latex(index=True,escape=False)


In [13]:
dfmae_time = pd.DataFrame({'LR':mae_lr_list, 'SVR':mae_svr_list, 'RF':mae_rf_list,
                           'LGBM':mae_lgbm_list, 'ANN':mae_ann_list, '3DCNN':mae_3dcnn_list,'CNN-LSTM':mae_cnnlstm_list}, index = time_list_1step.time)
dfmae_latex = dfmae_time.apply(format_min,axis=1)
dfmae_latex = dfmae_latex.to_latex(index=True,escape=False)
print(dfmae_latex)

\begin{tabular}{llllllll}
\toprule
{} &      LR &             SVR &              RF &    LGBM &             ANN &           3DCNN &         CNN-LSTM \\
\midrule
07:00:00 &   32.84 &           21.31 &  \textbf{21.25} &   21.61 &           21.76 &           24.73 &            21.52 \\
07:30:00 &   37.39 &           32.41 &           32.37 &   32.83 &           32.97 &           33.50 &   \textbf{32.24} \\
08:00:00 &   46.55 &           40.69 &           41.28 &   41.26 &           41.19 &           40.94 &   \textbf{40.57} \\
08:30:00 &   56.75 &           52.35 &           53.77 &   53.47 &           52.37 &           53.23 &   \textbf{51.28} \\
09:00:00 &   69.11 &           65.79 &           67.39 &   67.23 &           66.39 &           66.90 &   \textbf{64.02} \\
09:30:00 &   85.24 &           80.56 &           82.44 &   82.85 &           81.09 &           78.97 &   \textbf{76.93} \\
10:00:00 &   94.19 &           88.31 &           89.95 &   90.37 &           88.84 &           83.58 

  dfmae_latex = dfmae_latex.to_latex(index=True,escape=False)


# all 8 steps analysis

In [11]:
model_name_list = ['lr','svr','rf','lgbm','ann','3dcnn', 'cnnlstm']
rmse_dict = {'LR':[], 'SVR':[], 'RF':[],'LGBM':[],'ANN':[],'3DCNN':[] ,'CNNLSTM':[]} 
mae_dict = {'LR':[], 'SVR':[], 'RF':[],'LGBM':[],'ANN':[],'3DCNN':[], 'CNNLSTM':[]} 
nrmse_dict = {'LR':[], 'SVR':[], 'RF':[],'LGBM':[],'ANN':[],'3DCNN':[] ,'CNNLSTM':[]} 
for fh_step in range(1,9):
    dfstep = pd.read_csv(f'results_data/df_test_all_model_{fh_step}step.csv',parse_dates=['Datetime'])
    for model in model_name_list:
        y_true = dfstep[f'I_lead_{fh_step}step'].to_numpy()
        y_pred = dfstep[f'I_pred_{model}'].to_numpy()
        
        rmse = (mean_squared_error(y_true, y_pred))**0.5
        mae = mean_absolute_error(y_true, y_pred)
        nrmse = cal_nrmse(y_true, y_pred)
        
        rmse_dict[model.upper()].append(rmse)
        mae_dict[model.upper()].append(mae)
        nrmse_dict[model.upper()].append(nrmse)

## 8 steps performance table

In [15]:
fh_list = [str(30*i) for i in range(1,9)]
dfnrmse_allstep = pd.DataFrame(nrmse_dict, index=fh_list)
dfnrmse_allstep

Unnamed: 0,LR,SVR,RF,LGBM,ANN,3DCNN,CNNLSTM
30,17.696062,16.417917,16.883835,16.87845,16.484449,16.211607,16.05517
60,21.250492,19.69037,20.143164,20.241628,19.751322,21.00395,19.453962
90,23.425696,21.835838,22.483213,22.431587,21.898637,21.541999,21.73517
120,25.27762,24.072387,24.409346,24.37266,23.695699,23.23981,23.195592
150,26.767444,25.598439,26.035428,26.003177,25.259906,24.988391,25.050306
180,27.894458,26.790442,27.385188,27.317636,26.587392,26.474301,26.156994
210,29.111142,28.073368,28.586892,28.596794,27.942754,28.93883,27.405177
240,30.161888,29.151846,29.647663,29.670544,29.054507,28.285505,28.430506


In [16]:
def format_min(row):
    min_val = row.min()
    return row.apply(lambda x: "\textbf{" + f'{x:.2f}' + "}" if x == min_val else f'{x:.2f}')
dfnrmse_allstep_latex = dfnrmse_allstep.apply(format_min,axis=1)
dfnrmse_allstep_latex = dfnrmse_allstep_latex.to_latex(index=True,escape=False)
print(dfnrmse_allstep_latex)

\begin{tabular}{llllllll}
\toprule
{} &     LR &    SVR &     RF &   LGBM &    ANN &           3DCNN &         CNNLSTM \\
\midrule
30  &  17.70 &  16.42 &  16.88 &  16.88 &  16.48 &           16.21 &  \textbf{16.06} \\
60  &  21.25 &  19.69 &  20.14 &  20.24 &  19.75 &           21.00 &  \textbf{19.45} \\
90  &  23.43 &  21.84 &  22.48 &  22.43 &  21.90 &  \textbf{21.54} &           21.74 \\
120 &  25.28 &  24.07 &  24.41 &  24.37 &  23.70 &           23.24 &  \textbf{23.20} \\
150 &  26.77 &  25.60 &  26.04 &  26.00 &  25.26 &  \textbf{24.99} &           25.05 \\
180 &  27.89 &  26.79 &  27.39 &  27.32 &  26.59 &           26.47 &  \textbf{26.16} \\
210 &  29.11 &  28.07 &  28.59 &  28.60 &  27.94 &           28.94 &  \textbf{27.41} \\
240 &  30.16 &  29.15 &  29.65 &  29.67 &  29.05 &  \textbf{28.29} &           28.43 \\
\bottomrule
\end{tabular}



  dfnrmse_allstep_latex = dfnrmse_allstep_latex.to_latex(index=True,escape=False)


In [17]:
dfmae_allstep = pd.DataFrame(mae_dict, index=fh_list)
dfmae_allstep
dfmae_allstep_latex = dfmae_allstep.apply(format_min,axis=1)
dfmae_allstep_latex = dfmae_allstep_latex.to_latex(index=True,escape=False)
print(dfmae_allstep_latex)

\begin{tabular}{llllllll}
\toprule
{} &      LR &     SVR &      RF &    LGBM &     ANN &            3DCNN &          CNNLSTM \\
\midrule
30  &   85.24 &   79.09 &   81.33 &   81.31 &   79.41 &            78.09 &   \textbf{77.34} \\
60  &  106.97 &   99.12 &  101.40 &  101.90 &   99.43 &           105.73 &   \textbf{97.93} \\
90  &  120.86 &  112.66 &  116.00 &  115.73 &  112.98 &  \textbf{111.14} &           112.14 \\
120 &  130.24 &  124.03 &  125.77 &  125.58 &  122.09 &           119.74 &  \textbf{119.52} \\
150 &  138.34 &  132.30 &  134.56 &  134.39 &  130.55 &  \textbf{129.15} &           129.47 \\
180 &  141.93 &  136.31 &  139.34 &  138.99 &  135.28 &           134.70 &  \textbf{133.09} \\
210 &  140.84 &  135.82 &  138.31 &  138.35 &  135.19 &           140.01 &  \textbf{132.59} \\
240 &  135.71 &  131.17 &  133.40 &  133.50 &  130.73 &  \textbf{127.27} &           127.92 \\
\bottomrule
\end{tabular}



  dfmae_allstep_latex = dfmae_allstep_latex.to_latex(index=True,escape=False)
