# Libs

In [88]:
import pandas as pd
import numpy as np
np.random.seed(1)
from functools import partial
import itertools
import optuna

import plotly.graph_objects as go
import plotly.express as px
import time

from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS
import sklearn.metrics as metrics

ponte = pd.read_pickle(r'Data\Data_Ponte_dos_Remedios.pkl')
del ponte['o3']
guarulhos = pd.read_pickle(r'Data\Data_Guarulhos.pkl')
guarulhos = guarulhos[['date','o3']]

data = ponte.merge(guarulhos, on='date', how='outer')
data.reset_index(drop=True)

import shutil
import joblib
import pickle
from IPython.display import clear_output
import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

from pytorch_lightning import Trainer
trainer = Trainer(
    max_steps=4,
    logger=False,
    enable_progress_bar=False,
    enable_model_summary=False  # Disable model summary
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="optuna")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


# Module

In [2]:
# USED WITH PLOTLY TO ELABORATE THE DESIGN ===========================================================
def main_layout(fig:go.Figure, width=700, height=600, x=None, y=None, title=None,
               x_range=None, y_range=None, paper_color='white', 
               customdata=None, hover_customdata='Info', 
               hover_x='x',hover_y='y', **kwargs) -> go.Figure:
    fig.layout = go.Layout(
        width=width,
        height=height,
        plot_bgcolor=paper_color,
        paper_bgcolor=paper_color,
        xaxis={'gridcolor':'#cccccc', 'linecolor':'black','title':x, 'range':x_range},
        yaxis={'gridcolor':'#cccccc', 'linecolor':'black','title':y, 'range':y_range},
        title={'text':title},
        **kwargs
    )
    if customdata == 'no':
        ...
    elif customdata is None:
        fig.update_traces(patch={
            'customdata':customdata, 'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}'
        })
    else:
        fig.update_traces(patch={
            'customdata':customdata,
            'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}<br>' + hover_customdata + ': %{customdata}<br>'
        })
    return fig
# ====================================================================================================
def main_subplot_layout(fig:go.Figure, width=1400, height=500, title=None, paper_color='white',
                        x=None, y=None, rows=1, cols=2, x_range=None, y_range=None,
                        customdata=None, hover_customdata='Info', 
                        hover_x='x',hover_y='y', **kwargs) -> go.Figure:
    fig.update_layout({
        'width':width,
        'height':height,
        'plot_bgcolor':paper_color,
        'paper_bgcolor':paper_color,
        'title':title,
        **kwargs
    })
    for xaxis in fig.select_xaxes():
        xaxis.update(
            showgrid=True,
            gridcolor='#CCCCCC',
            linecolor='black',
            title=x,
            range=x_range
        )
    for yaxis in fig.select_yaxes():
        yaxis.update(
            showgrid=True,
            gridcolor='#CCCCCC',
            linecolor='black',
            title=y,
            range=y_range
        )
    if customdata == 'no':
        ...
    elif customdata is None:
        fig.update_traces(patch={
            'customdata':customdata, 'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}'
        })
    else:
        fig.update_traces(patch={
            'customdata':customdata,
            'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}<br>' + hover_customdata + ': %{customdata}<br>'
        })
    return fig
# ====================================================================================================

In [3]:
class TimeObject:
    def __init__(self, df:pd.DataFrame, column:str, 
                 NAN_treatment_args:dict={'method':'from_derivatives'},
                 agg_freq:str=None) -> None:

        self.df = df[['date',column]]
        self.column = column
        self.time_serie = self.to_serie_()

        self.NAN_treatment_(**NAN_treatment_args)
        self.NIXTLA_treatment_()
        if agg_freq != None: 
            self.nixtla_df = self.nixtla_df.groupby(pd.Grouper(key='ds', freq=agg_freq)).agg({'y': 'mean'}).reset_index()
            self.nixtla_df.loc[:, ['unique_id']] = 1.0
        self.NIXTLA_train_test(split=7)

    def to_serie_(self) -> pd.Series:
        time_serie = self.df[self.column].fillna(np.nan)
        time_serie.index = pd.to_datetime(self.df['date'])

        full_index = pd.date_range(start=time_serie.index.min(), end=time_serie.index.max(), freq='D')
        time_serie = time_serie.reindex(full_index)
        return time_serie
    
    def NAN_treatment_(self, **kwargs) -> None:
        self.time_serie = self.time_serie.interpolate(**kwargs)
    
    def NIXTLA_treatment_(self) -> None:
        self.nixtla_df = pd.DataFrame()
        self.nixtla_df.loc[:, ['ds']] = pd.to_datetime(self.time_serie.index)
        self.nixtla_df.loc[:, ['y']] = self.time_serie.values
        self.nixtla_df.loc[:, ['unique_id']] = 1.0

    def plot(self) -> go.Figure:
        fig = go.Figure()
        fig.add_trace(trace=go.Scatter(
            x=self.time_serie.index, y=self.time_serie,
            marker=dict(color='#222222')
        ))
        return fig

    def NIXTLA_train_test(self, split:int=12):
        self.split = split
        self.Y_train = self.nixtla_df[self.nixtla_df.ds<self.nixtla_df['ds'].values[-split]]
        self.Y_test = self.nixtla_df[self.nixtla_df.ds>=self.nixtla_df['ds'].values[-split]].reset_index(drop=True)

    def metrics_(self, forecast_df:pd.DataFrame, method:str='NHITS'):

        def smape(y_true, y_pred):
            summation = 0
            for i in range(len(y_true)):
                summation += np.abs(y_true[i]-y_pred[i])/(np.abs(y_true[i]) + np.abs(y_pred[i]))
            return 200/(len(y_true)+1) * summation
        
        self.metrics = {}
        self.metrics['mae'] = np.round(metrics.mean_absolute_error(y_true=self.Y_test['y'], y_pred=forecast_df[method]),5)
        self.metrics['mape'] = np.round(100*metrics.mean_absolute_percentage_error(y_true=self.Y_test['y'], y_pred=forecast_df[method]),5)
        self.metrics['mse'] = np.round(metrics.mean_squared_error(y_true=self.Y_test['y'], y_pred=forecast_df[method]),5)
        self.metrics['max'] = np.round(metrics.max_error(y_true=self.Y_test['y'], y_pred=forecast_df[method]),5)
        self.metrics['smape'] = np.round(smape(y_true=self.Y_test['y'], y_pred=forecast_df[method]),5)
        return

    def plot_time_series(self):
        fig = go.Figure()
        fig.add_trace(trace=go.Scatter(
            x=self.Y_train['ds'], y=self.Y_train['y'],
            mode='lines', marker=go.scatter.Marker(
                color='black'
            ), name='Time Series'
        ))
        main_layout(fig=fig, width=1100, height=450, title='Time Series', x='time', y='AQI')
        return fig

    def plot_forecast(self, forecast_df:pd.DataFrame, confidence:int=90, method='NHITS', show:bool=True, show_metrics:bool=True):
        fig = go.Figure()

        fig.add_trace(trace=go.Scatter(
            x=self.Y_train['ds'], y=self.Y_train['y'],
            mode='lines', marker=go.scatter.Marker(
                color='black'
            ), name='train'
        ))
        
        fig.add_trace(trace=go.Scatter(
            x=self.Y_test['ds'], y=self.Y_test['y'],
            mode='lines', marker=go.scatter.Marker(
                color='skyblue'
            ), name='test'
        ))

        fig.add_trace(trace=go.Scatter(
            x=forecast_df['ds'], y=forecast_df[f'{method}'],
            mode='lines', marker=go.scatter.Marker(
                color='orange'
            ), name=method
        ))

        try:
            fig.add_trace(go.Scatter(
                x=forecast_df['ds'], y=forecast_df[f'{method}-lo-{confidence}'],
                mode='lines', line=dict(width=0), fill='tonexty',
                fillcolor='rgba(255, 165, 0, 0)',
                showlegend=False
            ))

            fig.add_trace(go.Scatter(
                x=forecast_df['ds'], y=forecast_df[f'{method}-hi-{confidence}'],
                mode='lines', line=dict(width=0), fill='tonexty',
                fillcolor='rgba(255, 165, 0, 0.2)',
                name=f'confidence: {confidence}%'
            ))
        except: ...

        main_layout(fig=fig, width=1100, height=450, title='Forecast', x='time', y='AQI')

        if show:
            fig.show()
        if show_metrics:
            self.metrics_(forecast_df, method=method)
            for key, metric in self.metrics.items():
                print(f'{key}: {metric}')
        
        return fig

# **Analysis**

## **Metrics**

In [204]:
for pollutant in ['co','pm10','pm25','o3','no2']:
    for h in [12, 52]:
        df = joblib.load(fr"Results\Stats\{pollutant}\{h}W_Df.pkl")
        display(df.sort_values(['smape','mae'])[['method','smape','mae','max','mape','mse']])

Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZNA,24.89825,1.08606,2.19688,30.34291,1.6211
0,AutoTheta-Multi,24.92643,1.08936,2.43284,27.70377,1.73129
0,AutoETS-ZNM,25.58223,1.10997,2.01239,31.4586,1.66209
0,AutoCES-S,25.99763,1.13074,2.21889,33.01316,1.67015
0,AutoTheta-Add,27.0829,1.18473,2.46277,28.35081,1.95795
0,AutoCES-Z,27.3386,1.19544,2.52749,28.25192,2.0173
0,AutoCES-P,27.3386,1.19544,2.52749,28.25192,2.0173
0,AutoCES-N,29.6303,1.30465,2.93478,42.50113,2.30758
0,AutoETS-ZNZ,30.22121,1.34102,3.04686,44.72782,2.50598
0,AutoETS-ZNN,30.22121,1.34102,3.04686,44.72782,2.50598


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoTheta-Multi,33.37884,1.32545,4.62213,33.19242,2.8607
0,AutoTheta-Add,34.28817,1.29758,3.72869,35.1567,2.60405
0,AutoCES-Z,36.93523,1.37189,3.95345,35.90394,2.96532
0,AutoCES-P,36.93523,1.37189,3.95345,35.90394,2.96532
0,AutoETS-ZNA,40.58155,1.67728,5.30735,62.90774,4.08942
0,AutoETS-ZNM,41.01251,1.6883,5.26967,63.23621,4.02158
0,AutoCES-S,41.69359,1.73764,5.68852,65.0512,4.32632
0,AutoETS-ZZM,41.73753,1.59399,4.98591,40.7394,3.96101
0,AutoETS-ZAM,41.73753,1.59399,4.98591,40.7394,3.96101
0,AutoETS-ZZZ,42.06347,1.64568,6.20138,36.07783,5.03326


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAA,12.08282,2.3309,7.85979,15.51141,11.96371
0,AutoCES-Z,12.44258,2.40267,8.44559,16.26149,13.10412
0,AutoCES-P,12.44258,2.40267,8.44559,16.26149,13.10412
0,AutoETS-ZAM,12.68651,2.44994,7.79319,16.27909,13.19058
0,AutoTheta-Multi,12.89419,2.52445,7.73717,16.01092,12.90712
0,AutoETS-ZZM,14.06534,2.76747,8.80028,18.73892,15.80509
0,AutoETS-ZNM,14.06534,2.76747,8.80028,18.73892,15.80509
0,AutoETS-ZZA,14.40857,2.84798,9.22936,19.28716,16.05698
0,AutoETS-ZNA,14.40857,2.84798,9.22936,19.28716,16.05698
0,AutoTheta-Add,14.58514,2.88534,7.76309,16.82531,13.11535


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAA,18.13249,4.85589,16.09169,20.02276,42.25891
0,AutoTheta-Multi,18.21126,4.87571,16.83621,19.84534,44.90825
0,AutoCES-P,18.39206,4.90429,14.77497,20.40625,41.91913
0,AutoCES-Z,18.54263,4.93791,14.7263,20.8548,41.84812
0,AutoTheta-Add,18.77686,5.00194,17.38565,20.12429,45.66619
0,AutoETS-ZAM,18.9799,5.05286,15.05179,21.05321,43.20567
0,AutoETS-ZZA,19.47881,5.18246,17.65907,22.71603,44.19734
0,AutoETS-ZNA,19.47881,5.18246,17.65907,22.71603,44.19734
0,AutoCES-S,19.86167,5.22929,14.30606,23.15747,42.62577
0,AutoETS-ZZM,20.0177,5.32369,14.92899,23.44348,46.11952


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAA,10.11535,4.76016,14.42661,12.2634,42.57126
0,AutoETS-ZZA,10.3778,4.8727,15.94328,13.0416,49.10307
0,AutoETS-ZNA,10.3778,4.8727,15.94328,13.0416,49.10307
0,AutoETS-ZZM,10.61776,5.00365,16.04866,13.26546,51.2438
0,AutoETS-ZNM,10.61776,5.00365,16.04866,13.26546,51.2438
0,AutoETS-ZAM,10.90024,5.17125,14.55874,13.13209,46.67707
0,AutoCES-S,11.33413,5.39295,17.726,14.44027,60.80736
0,AutoTheta-Multi,11.41406,5.47995,13.38595,13.26542,46.66477
0,AutoTheta-Add,12.88096,6.27463,10.98842,14.19399,50.13133
0,AutoCES-N,13.56578,6.63845,24.78097,18.28864,98.98733


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAA,15.09911,9.42521,33.65948,16.11659,160.64256
0,AutoETS-ZZA,15.11434,9.48997,36.52129,16.7673,165.59933
0,AutoETS-ZNA,15.11434,9.48997,36.52129,16.7673,165.59933
0,AutoETS-ZAM,15.26206,9.42826,30.67827,16.18626,159.76828
0,AutoETS-ZZM,15.55795,9.68507,32.11376,17.16788,167.7378
0,AutoETS-ZNM,15.55795,9.68507,32.11376,17.16788,167.7378
0,AutoTheta-Multi,15.55902,9.67022,36.15954,16.42138,170.05883
0,AutoCES-S,15.58476,9.59944,25.33848,17.14909,155.87159
0,AutoTheta-Add,16.26146,10.05148,37.27603,16.98788,174.79502
0,AutoCES-Z,17.08354,10.30621,34.95526,17.05219,172.07074


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAM,23.63952,6.60557,12.4865,30.46827,57.89326
0,AutoTheta-Multi,24.05676,6.67765,13.43097,26.64872,58.00644
0,AutoTheta-Add,24.10671,6.70269,14.41162,25.79861,60.36447
0,AutoETS-ZZM,24.21108,6.80787,13.61418,32.14811,64.97988
0,AutoETS-ZNM,24.21108,6.80787,13.61418,32.14811,64.97988
0,AutoCES-Z,24.55952,6.84736,15.57986,24.88485,66.80967
0,AutoCES-P,24.55952,6.84736,15.57986,24.88485,66.80967
0,AutoETS-ZAA,24.73739,6.97433,12.95824,31.90357,62.05319
0,AutoETS-ZZA,25.04787,7.09358,13.70535,33.0506,67.00217
0,AutoETS-ZNA,25.04787,7.09358,13.70535,33.0506,67.00217


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoTheta-Multi,22.35501,5.63858,21.74843,26.64931,53.78683
0,AutoTheta-Add,22.38963,5.63857,21.68314,25.87532,54.18467
0,AutoCES-Z,22.71863,5.65106,19.87966,25.12109,51.99078
0,AutoCES-P,22.71863,5.65106,19.87966,25.12109,51.99078
0,AutoETS-ZZM,24.49414,6.29526,21.2256,30.89119,63.71468
0,AutoETS-ZNM,24.49414,6.29526,21.2256,30.89119,63.71468
0,AutoCES-S,24.55848,6.29281,20.2677,30.83833,63.94614
0,AutoETS-ZZA,24.62147,6.32487,22.42444,30.94088,64.39331
0,AutoETS-ZNA,24.62147,6.32487,22.42444,30.94088,64.39331
0,AutoCES-N,24.76737,6.08375,16.33794,29.54554,54.23059


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoTheta-Add,20.6173,3.71682,7.61952,24.08457,20.14518
0,AutoTheta-Multi,20.91795,3.73501,7.79041,26.53022,20.21888
0,AutoCES-Z,20.99396,3.78193,8.24851,23.52069,20.93008
0,AutoCES-P,20.99396,3.78193,8.24851,23.52069,20.93008
0,AutoETS-ZAA,23.32937,4.3342,10.72149,32.01612,28.42982
0,AutoETS-ZAM,25.36642,4.81609,9.92098,35.44764,33.10183
0,AutoCES-N,26.43192,4.90963,13.12811,40.06296,37.76772
0,AutoETS-ZZA,27.379,5.313,12.46172,39.66012,40.91443
0,AutoETS-ZNA,27.379,5.313,12.46172,39.66012,40.91443
0,AutoCES-S,27.9457,5.41147,12.56811,41.61006,44.31287


Unnamed: 0,method,smape,mae,max,mape,mse
0,AutoETS-ZAA,20.93117,4.52018,17.53883,20.27477,33.67513
0,AutoETS-ZAM,21.05063,4.59769,16.86638,20.47409,34.27567
0,AutoCES-Z,21.08129,4.54697,17.60069,20.70771,34.03772
0,AutoCES-P,21.08129,4.54697,17.60069,20.70771,34.03772
0,AutoETS-ZZA,21.95212,4.91348,14.39983,26.452,36.45116
0,AutoETS-ZNA,21.95212,4.91348,14.39983,26.452,36.45116
0,AutoCES-S,21.96164,4.8554,13.10249,26.52861,33.84463
0,AutoETS-ZZM,22.38096,4.9886,14.09895,26.58487,35.3012
0,AutoETS-ZNM,22.38096,4.9886,14.09895,26.58487,35.3012
0,AutoETS-ZAN,24.44304,5.47442,15.98521,28.87449,48.33409


## **Params DataFrame**

### NHITS

In [198]:
df_nhits_params = pd.DataFrame()
for h in [12, 52]:
    for pollutant in ['co','pm10','pm25','o3','no2']:
        nhits = joblib.load(fr"Results\NHITS\{pollutant}\{h}W_Study.pkl")
        nhits = nhits.trials_dataframe().sort_values(by=['values_0','values_1']).reset_index(drop=True)
        nhits['pollutant'] = pollutant
        nhits['h'] = h
        nhits = nhits.loc[[0],['pollutant','h','params_input_size','params_max_steps','params_n_stacks','params_n_blocks','params_n_freq_downsample','params_n_pool_kernel_size','params_local_scalar_type']]
        nhits.columns = nhits.columns.str.replace('params_', '', regex=False)
        df_nhits_params = pd.concat([df_nhits_params, nhits])
display(df_nhits_params)

Unnamed: 0,pollutant,h,input_size,max_steps,n_stacks,n_blocks,n_freq_downsample,n_pool_kernel_size,local_scalar_type
0,co,12,109,70,3,3,"[52, 12, 52]","[3, 1, 1]",standard
0,pm10,12,134,456,3,1,"[12, 12, 4]","[1, 3, 1]",boxcox
0,pm25,12,141,166,5,5,"[12, 1, 1]","[2, 2, 2]",boxcox
0,o3,12,6,167,7,4,"[12, 52, 12]","[1, 1, 1]",standard
0,no2,12,117,85,7,5,"[12, 4, 52]","[1, 2, 3]",minmax
0,co,52,41,393,3,7,"[12, 52, 4]","[3, 1, 1]",
0,pm10,52,89,397,4,5,"[1, 12, 12]","[2, 1, 1]",standard
0,pm25,52,110,156,6,6,"[4, 12, 52]","[1, 3, 1]",standard
0,o3,52,28,73,4,1,"[52, 1, 12]","[2, 2, 2]",boxcox
0,no2,52,81,336,7,7,"[52, 4, 1]","[2, 1, 2]",


### NBEATS

In [203]:
df_nbeats_params = pd.DataFrame()
for h in [12, 52]:
    for pollutant in ['co','pm10','pm25','o3','no2']:
        nbeats = joblib.load(fr"Results\NBEATS\{pollutant}\{h}W_Study.pkl")
        nbeats = nbeats.trials_dataframe().sort_values(by=['values_0','values_1']).reset_index(drop=True)
        nbeats['pollutant'] = pollutant
        nbeats['h'] = h
        nbeats = nbeats.loc[[0],['pollutant','h','params_input_size','params_max_steps','params_n_stacks','params_n_blocks','params_interpretability','params_local_scalar_type']]
        nbeats.columns = nbeats.columns.str.replace('params_', '', regex=False)
        df_nbeats_params = pd.concat([df_nbeats_params, nbeats])
display(df_nbeats_params)

Unnamed: 0,pollutant,h,input_size,max_steps,n_stacks,n_blocks,interpretability,local_scalar_type
0,co,12,66,587,5,3,"[identity, trend]",boxcox
0,pm10,12,40,88,5,5,"[seasonality, seasonality]",boxcox
0,pm25,12,81,614,2,1,"[seasonality, trend]",boxcox
0,o3,12,12,586,7,4,"[identity, identity]",
0,no2,12,144,290,4,2,"[trend, seasonality]",minmax
0,co,52,135,605,2,5,"[identity, seasonality]",boxcox
0,pm10,52,127,134,5,4,"[identity, seasonality]",standard
0,pm25,52,116,398,6,4,"[identity, identity]",
0,o3,52,24,38,2,2,"[trend, identity]",minmax
0,no2,52,85,255,3,2,"[trend, trend]",boxcox
