# Vizualisation Notebook

This Notebook presents a class that facilitates the analyse and the visualisation of the results.

In [4]:
import chart_studio.plotly as plotly
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.renderers.default='notebook'
import warnings
warnings.filterwarnings("ignore") 
import sklearn
import datetime
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from matplotlib import cm
from plotly.subplots import make_subplots

### Functions

In [9]:
# Load data if the models forecasts are stored in separate pickle files
def load_data(models = ["CNN_LSTM", "CNN", "GRU","KNN", "LGB", "LR", "LSTM", "RNN", "SVR", "XGB_1000", "RF", "FNN", "COP", "MOE"],
              horizon = [1, 4, 24, 48, 168], weather = 1, no_weather = 1):
    path = "../Data/results/"
    y_test =  pd.read_pickle(os.path.join(path, "y_actual.pkl"))
    df = pd.DataFrame()
    df["Actual"] = y_test['X(t)'].values
    df.set_index(y_test.index, inplace=True)

    for h in horizon:
        for model in models:
            df[str(model + '_' + str(h))] = pd.read_pickle(os.path.join(path, str(model+ '_' + str(h) + ".pkl")))
    df.dropna(inplace=True)
    return(df)

# Load data if the models are stored in one single pickle file 
def load_data_2(path, horizon, clustered):
    path = "../Data/results/"
    y =  pd.read_pickle(os.path.join(path, "y_actual.pkl"))
    df = pd.read_pickle(os.path.join(path, str(str(horizon) + "_" + clustered + "_prediction.pkl")))
    df = df.reindex(y.index)
    df["Actual"] = y['X(t)'].values
    df.dropna(inplace=True)
    return(df)

def normalized_root_mean_squared_error(y, pred):
    y[y == 0] = np.finfo(float).eps
    return(np.sqrt(1/len(y)*np.sum(np.square(np.divide(np.subtract(y,pred), y)))))

def root_mean_squared_error(y, pred) :
    return(mean_squared_error(y, pred, squared = False))

# Compute the error for every models
def error(ytest, df, error):
    error_ = []
    for model in df.columns:
        error_.append(error(ytest, df[model]))
    return(error_)

# Store the RMSE, the MAE and the NRMSE of every models in 3 separate 
def mae_rmse_nrmse(df, m):
    rmse = error(df['Actual'], df, root_mean_squared_error)
    mae = error(df['Actual'], df, mean_absolute_error)
    nrmse = error(df['Actual'], df, normalized_root_mean_squared_error)
    name = []
    for model in df.columns:
        if (model == 'Actual'):
            name.append('Actual')
            continue
        i = 0
        while (i < len(m) and not(model.startswith(m[i]))):
            i+=1
        name.append(m[i])        
    name = pd.DataFrame(name, index = df.columns, columns = ["Name"])
    rmse = pd.DataFrame(rmse, index = df.columns, columns = ["RMSE"])
    rmse = pd.concat((rmse, name), axis = 1)
    mae = pd.DataFrame(mae, index = df.columns, columns = ["MAE"])
    mae = pd.concat((mae, name), axis = 1)
    nrmse = pd.DataFrame(nrmse, index = df.columns, columns = ["NRMSE"])
    nrmse = pd.concat((nrmse, name), axis = 1)
    mae.drop('Actual', inplace=True)
    rmse.drop('Actual', inplace=True)
    nrmse.drop('Actual', inplace=True)    
    return(mae, rmse, nrmse)

def residual_error(df):
    residual_error = df.copy()
    for model in residual_error.columns:
        residual_error[model]= df['Actual'] - residual_error[model] 
    residual_error.drop(['Actual'], axis=1, inplace=True)
    return(residual_error)
    
def statistics(df):
    stats = pd.DataFrame()
    stats["Standard_deviation"] = df.std()
    stats["Mean"] = df.mean()
    stats["Mean<0"] = df[df<0].mean()
    stats["Mean>0"] = df[df>0].mean()
    stats["Count<0"] = df[df<0].count()
    stats["Count>0"] = df[df>0].count()
    stats["Skewness"] = df.skew()
    stats["Kurtosis"] = df.kurtosis()
    return(stats)

def weekly_error(df, func):
    weekly_error = pd.DataFrame(columns = df.columns)

    for day in range(7) :   
        daily_error = pd.DataFrame(data = [error(df['Actual'][df.index.weekday == day], df[df.index.weekday == day], func)], 
                               columns = df.columns)
        weekly_error = weekly_error.append(daily_error)
    weekly_error.drop(['Actual'], axis = 1, inplace = True)
    weekly_error['Day'] = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    weekly_error.set_index('Day', inplace = True)
    return(weekly_error)

def monthly_error(df, func):
    monthly_error = pd.DataFrame(columns = df.columns)
    for month in range(2,12) :   
        month_error = pd.DataFrame(data = [error(df['Actual'][df.index.month == month], df[df.index.month == month], func)], 
                               columns = df.columns)
        monthly_error = monthly_error.append(month_error)
    monthly_error.drop(['Actual'], axis = 1, inplace = True)
    monthly_error['Month'] = ["February", "March", "April", "May", "June", "July", "August", "September", 
                              "October", "Novembre"]
    monthly_error.set_index('Month', inplace = True)
    return(monthly_error)

def hourly_error(df, func):
    hourly_error = pd.DataFrame(columns = df.columns)
    for hour in range(0,24) :   
        hour_error = pd.DataFrame(data = [error(df['Actual'][df.index.hour == hour], df[df.index.hour == hour], func)], 
                               columns = df.columns)
        hourly_error = hourly_error.append(hour_error)
    hourly_error.drop(['Actual'], axis = 1, inplace = True)
    hourly_error['Hour'] = range(0,24)
    hourly_error.set_index('Hour', inplace = True)
    return(hourly_error)

def distribution_daily(df, model, rate, title):
    res_error = df[[model]]
    res_error['Day'] = res_error.index.date
    res_error['Hour'] = res_error.index.hour
    res_error = res_error.pivot(index = 'Hour', columns = 'Day')
    res_error.dropna(axis=1, inplace = True)
    res_error = res_error.abs()
    n = rate
    colormap = cm.Blues 
    percentiles = np.linspace(0,100,n)
    percent = np.zeros((24,n))

    for i in range(n):
        for t in range(24):
            percent[t,i] = np.percentile(res_error.values[t,:], percentiles[i])

    half = int((n-1)/2)

    fig, (ax1) = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(8,4))
    ax1.plot(np.arange(0,24,1), percent[:,half],color='k')
    for i in range(half):
        ax1.fill_between(np.arange(0,24,1), percent[:,i],percent[:,-(i+1)],color=colormap(i/half))

    ax1.set_title(title, fontsize=15)
    ax1.tick_params(labelsize=11.5)
    ax1.set_xlabel('Hour', fontsize=14)
    ax1.set_ylabel(model, fontsize=14)
    ax1.axvline(x=7, color = "black", linestyle='--')  
    ax1.axhline(y = 100, color = 'black', linestyle = '--')
    #ax1.axvline(x=12, color = "black")
    #ax1.axvline(x=18, color = "black")

    fig.tight_layout()
    
def distribution_weekly(df, model, rate, title):
    res_error = df[[model]]
    res_error['Day&Hour'] = res_error.index.weekday.astype(str) + '-' + res_error.index.hour.astype(str)
    res_error["Week&Year"] = res_error.index.week.astype(str) + '-' + res_error.index.year.astype(str)
    res_error = res_error.pivot_table(index='Day&Hour', columns='Week&Year', values=model)
    res_error.dropna(axis=1, inplace = True)
    res_error = res_error.abs()
    n = rate
    colormap = cm.Reds
    percentiles = np.linspace(0,100,n)
    percent = np.zeros((168,n))
    for i in range(n):
        for t in range(168):
            percent[t,i] = np.percentile(res_error.values[t,:], percentiles[i])

    half = int((n-1)/2)

    fig, (ax1) = plt.subplots(nrows=1, ncols=1, sharex=True, figsize=(18,8))
    ax1.plot(np.arange(0,168,1), percent[:,half],color='k')
    for i in range(half):
        ax1.fill_between(np.arange(0,168,1), percent[:,i],percent[:,-(i+1)],color=colormap(i/half))

    ax1.set_title(title, fontsize=15)
    ax1.tick_params(labelsize=11.5)
    ax1.set_xlabel('Time', fontsize=14)
    ax1.set_ylabel(model, fontsize=14)
    ax1.axhline(y=20, color = "black")  
    ax1.axhline(y=60, color = "black")
    ax1.axhline(y=100, color = "black")
    ax1.axvline(x=6, color = "black")  
    ax1.axvline(x=12, color = "black")
    ax1.axvline(x=18, color = "black")
    ax1.axvline(x=24, color = "black")  
    ax1.axvline(x=30, color = "black")
    ax1.axvline(x=36, color = "black")
    fig.tight_layout()
    

In [12]:
class Report:
    
    def __init__(self, df):
        self.models = df.columns
        self.data = df
        self.residual_error = residual_error(self.data)
        self.stats = statistics(self.residual_error)
        self.rmse_daily = weekly_error(self.data, root_mean_squared_error)
        self.mae_daily = weekly_error(self.data, mean_absolute_error)
        self.nrmse_daily = weekly_error(self.data, normalized_root_mean_squared_error)
        self.rmse_monthly = monthly_error(self.data, root_mean_squared_error)
        self.mae_monthly = monthly_error(self.data, mean_absolute_error)
        self.nrmse_monthly = monthly_error(self.data, normalized_root_mean_squared_error)
        self.rmse_hourly = hourly_error(self.data, root_mean_squared_error)
        self.mae_hourly = hourly_error(self.data, mean_absolute_error)
        self.nrmse_hourly = hourly_error(self.data, normalized_root_mean_squared_error)
        self.mae, self.rmse, self.nrmse = mae_rmse_nrmse(self.data, self.models)
        
    def percentile_daily_distribution(self, model, rate):
        return(distribution_daily(self.residual_error, model, rate, "Daily Absolute Error Distribution"))
    
    def percentile_weekly_distribution(self, model, rate):
        return(distribution_weekly(self.residual_error, model, rate, "Weekly Absolute Error Distribution"))
    
    def prediction_daily_distribution(self, model, rate):
        return(distribution_daily(self.data, model, rate, "Daily Prediction Distribution"))
    
    def prediction_weekly_distribution(self, model, rate):
        return(distribution_weekly(self.data, model, rate, "Weekly Prediction Distribution"))
    
    def models_name(self):
        return(list(self.data.columns))
    
    def plot(self):
        data_ = self.data.copy()
        data_['weekend_'] = data_.index.weekday > 4
        data_['Actual Weekend'] = data_['Actual'][data_['weekend_']==True]
        data_['Actual Weekday'] = data_['Actual'][data_['weekend_']==False]
        data_.drop(['weekend_', 'Actual'], axis=1, inplace = True)
        fig = px.line(data_, x=data_.index, y=data_.columns,
              title='General comparison')
        fig.update_xaxes(rangeslider_visible=True)
        fig.show()
    
    def error_distribution(self,x,y):
        fig_hist = make_subplots(rows=x, cols=y, subplot_titles=self.residual_error.columns)
        count = 0
        for model in self.residual_error.columns:
            i = count//y
            j = count%y
            data = self.residual_error[model]
            fig_hist.add_trace(go.Histogram(x=data), row=i+1, col=j+1)
            count+=1
        fig_hist.show()
        
    def plot_mae(self):
        fig = px.bar(self.mae, x=self.mae.index, y=self.mae['MAE'], color=self.mae.index)
        fig.show()
        
    def plot_rmse(self):
        fig = px.bar(self.rmse, x=self.rmse.index, y=self.rmse['RMSE'], color=self.rmse['Name'])
        fig.show()
    
    def plot_nrmse(self):
        fig = px.bar(self.nrmse, x=self.nrmse.index, y=self.nrmse['NRMSE'], color=self.nrmse['Name'])
        fig.show()
        
    def plot_rmse_hourly(self):
        fig = px.line(self.rmse_hourly, x=self.rmse_hourly.index, y=self.rmse_hourly.columns, title = "Hourly RMSE")
        fig.show()
        
    def plot_rmse_daily(self):
        fig = px.line(self.rmse_daily, x=self.rmse_daily.index, y=self.rmse_daily.columns, title = "Daily RMSE")
        fig.show()
        
    def plot_rmse_monthly(self):        
        fig = px.line(self.rmse_monthly, x=self.rmse_monthly.index, y=self.rmse_monthly.columns, title = "Monthly RMSE")
        fig.show()
        
    def plot_nrmse_hourly(self):
        fig = px.line(self.nrmse_hourly, x=self.nrmse_hourly.index, y=self.nrmse_hourly.columns, title = "Hourly NRMSE")
        fig.show()
        
    def plot_nrmse_daily(self):
        fig = px.line(self.nrmse_daily, x=self.nrmse_daily.index, y=self.nrmse_daily.columns, title = "Daily NRMSE")
        fig.show()
        
    def plot_rmse_monthly(self):        
        fig = px.line(self.nrmse_monthly, x=self.rmse_monthly.index, y=self.rmse_monthly.columns, title = "Monthly RMSE")
        fig.show()
        
    def plot_mae_hourly(self):
        fig = px.line(self.mae_hourly, x=self.mae_hourly.index, y=self.mae_hourly.columns, title = "Hourly MAE")
        fig.show()
        
    def plot_mae_daily(self):
        fig = px.line(self.mae_daily, x=self.mae_daily.index, y=self.mae_daily.columns, title = "Daily MAE")
        fig.show()
        
    def plot_mae_monthly(self):        
        fig = px.line(self.mae_monthly, x=self.mae_monthly.index, y=self.mae_monthly.columns, title = "Monthly MAE")
        fig.show()