## Libs

In [1]:
import pandas as pd
import numpy as np
np.random.seed(1)
import optuna

import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots
import time

from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS
import sklearn.metrics as metrics

ponte = pd.read_pickle(r'Data\Data_Ponte_dos_Remedios.pkl')
del ponte['o3']
guarulhos = pd.read_pickle(r'Data\Data_Guarulhos.pkl')
guarulhos = guarulhos[['date','o3']]

data = ponte.merge(guarulhos, on='date', how='outer')
data.reset_index(drop=True)

import joblib
import pickle
from IPython.display import clear_output
import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

from pytorch_lightning import Trainer
trainer = Trainer(
    max_steps=4,
    logger=False,
    enable_progress_bar=False,
    enable_model_summary=False  # Disable model summary
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="optuna")

from TimeObjectModule import TimeObject, main_layout, main_subplot_layout, data

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


## **Metrics DataFrame**

In [5]:
for pollutant in ['co','pm10','pm25','o3','no2']:
    for h in [12, 52]:
        # print(pollutant,h)
        # display(pd.read_pickle(rf'C:\Users\gustavo.filho\Documents\Python\GitHub\TCC\Results\Stats\{pollutant}\{h}W_Df.pkl').sort_values(by=['smape','mae']).reset_index(drop=True).loc[:,['method','smape','mae','max','mape','mse']])
        # print()
        # print()
        # print()
        ...

## **Params DataFrame**

### NHITS

In [6]:
df_nhits_params = pd.DataFrame()
for h in [12, 26, 52, 78]:
    for pollutant in ['co','pm10','pm25','o3','no2']:
        nhits = joblib.load(fr"Results\NHITS\{pollutant}\{h}W_Study.pkl")
        nhits = nhits.trials_dataframe().sort_values(by=['values_0','values_1']).reset_index(drop=True)
        nhits['pollutant'] = pollutant
        nhits['h'] = h
        nhits = nhits.loc[[0],['pollutant','h','params_input_size','params_max_steps','params_n_stacks','params_n_blocks','params_n_freq_downsample','params_n_pool_kernel_size','params_local_scalar_type']]
        nhits.columns = nhits.columns.str.replace('params_', '', regex=False)

        for metric in ['smape','mae','max','mape','mse']:
            metrics = pd.read_pickle(rf'C:\Users\gustavo.filho\Documents\Python\GitHub\TCC\Results\Stats\{pollutant}\{h}W_Df.pkl').reset_index(drop=True)
            metrics = metrics.sort_values(by=metric).reset_index(drop=True)
            metrics = metrics[metrics['method'] == 'NHITS'].index + 1
            nhits[metric] = metrics[0]

        df_nhits_params = pd.concat([df_nhits_params, nhits])

display(df_nhits_params)

Unnamed: 0,pollutant,h,input_size,max_steps,n_stacks,n_blocks,n_freq_downsample,n_pool_kernel_size,local_scalar_type,smape,mae,max,mape,mse
0,co,12,109,70,3,3,"[52, 12, 52]","[3, 1, 1]",standard,7,6,8,10,6
0,pm10,12,134,456,3,1,"[12, 12, 4]","[1, 3, 1]",boxcox,11,11,13,11,12
0,pm25,12,141,166,5,5,"[12, 1, 1]","[2, 2, 2]",boxcox,11,11,12,13,13
0,o3,12,6,167,7,4,"[12, 52, 12]","[1, 1, 1]",standard,2,2,21,2,2
0,no2,12,117,85,7,5,"[12, 4, 52]","[1, 2, 3]",minmax,6,6,5,5,5
0,co,26,83,99,3,3,"[12, 1, 1]","[1, 3, 1]",,4,4,4,6,4
0,pm10,26,45,140,4,2,"[4, 1, 52]","[2, 3, 1]",boxcox,10,10,13,3,8
0,pm25,26,109,92,3,6,"[1, 12, 12]","[1, 2, 3]",,1,1,12,1,1
0,o3,26,94,310,3,6,"[12, 12, 12]","[1, 1, 2]",boxcox,1,1,1,1,1
0,no2,26,107,486,3,6,"[52, 12, 52]","[3, 2, 3]",standard,14,14,14,14,14


### NBEATS

In [7]:
df_nbeats_params = pd.DataFrame()
for h in [12, 26, 52, 78]:
    for pollutant in ['co','pm10','pm25','o3','no2']:
        nbeats = joblib.load(fr"Results\NBEATS\{pollutant}\{h}W_Study.pkl")
        nbeats = nbeats.trials_dataframe().sort_values(by=['values_0','values_1']).reset_index(drop=True)
        nbeats['pollutant'] = pollutant
        nbeats['h'] = h
        nbeats = nbeats.loc[[0],['pollutant','h','params_input_size','params_max_steps','params_n_stacks','params_n_blocks','params_interpretability','params_local_scalar_type']]
        nbeats.columns = nbeats.columns.str.replace('params_', '', regex=False)

        for metric in ['smape','mae','max','mape','mse']:
            metrics = pd.read_pickle(rf'C:\Users\gustavo.filho\Documents\Python\GitHub\TCC\Results\Stats\{pollutant}\{h}W_Df.pkl').reset_index(drop=True)
            metrics = metrics.sort_values(by=metric).reset_index(drop=True)
            metrics = metrics[metrics['method'] == 'NBEATS'].index + 1
            nbeats[metric] = metrics[0]

        df_nbeats_params = pd.concat([df_nbeats_params, nbeats])

display(df_nbeats_params)

Unnamed: 0,pollutant,h,input_size,max_steps,n_stacks,n_blocks,interpretability,local_scalar_type,smape,mae,max,mape,mse
0,co,12,66,587,5,3,"[identity, trend]",boxcox,5,4,21,1,13
0,pm10,12,40,88,5,5,"[seasonality, seasonality]",boxcox,14,14,12,13,13
0,pm25,12,81,614,2,1,"[seasonality, trend]",boxcox,21,21,19,21,21
0,o3,12,12,586,7,4,"[identity, identity]",,1,1,1,1,1
0,no2,12,144,290,4,2,"[trend, seasonality]",minmax,1,1,21,6,8
0,co,26,42,198,7,3,"[trend, identity]",boxcox,14,12,19,7,15
0,pm10,26,50,456,2,2,"[seasonality, trend]",boxcox,13,13,15,13,14
0,pm25,26,62,155,2,1,"[trend, identity]",,2,2,14,2,3
0,o3,26,32,97,2,3,"[trend, trend]",,3,3,3,9,3
0,no2,26,114,39,3,5,"[identity, identity]",minmax,1,1,2,1,1


In [8]:
# metrics = pd.read_pickle(rf'C:\Users\gustavo.filho\Documents\Python\GitHub\TCC\Results\Stats\co\12W_Df.pkl').reset_index(drop=True)
# metrics = metrics.sort_values(by='mse').reset_index(drop=True)
# metrics

# **Visualizations**

In [None]:
# from statsforecast import StatsForecast
# from statsforecast.models import AutoARIMA, AutoCES, AutoETS, AutoTheta

# results_stats = []
# # for pollutant in ['co','pm10','pm25','o3','no2']:
# for pollutant in ['pm25']:
#     for h in [12, 26, 52, 78]:

#         obj = TimeObject(data, pollutant, agg_freq='W')
#         obj.NIXTLA_train_test(split=h)

#         # ======================================================================================================

#         nbeats = joblib.load(fr"Results\NBEATS\{pollutant}\{h}W_Study.pkl")
#         model = NBEATS(
#             h=h,
#             input_size=nbeats.best_trials[0].params.get('input_size'),
#             stack_types=nbeats.best_trials[0].params.get('interpretability')+(nbeats.best_trials[0].params.get('n_stacks')-len(nbeats.best_trials[0].params.get('interpretability')))*['identity'],
#             n_blocks=nbeats.best_trials[0].params.get('n_stacks') * [nbeats.best_trials[0].params.get('n_blocks')],
#             max_steps=nbeats.best_trials[0].params.get('max_steps'),
#             learning_rate=1e-3,
#             val_check_steps=10,
#         )
#         fcst = NeuralForecast(
#             models=[model],
#             freq='W',
#             local_scaler_type=nbeats.best_trials[0].params.get('local_scalar_type')
#         )
#         fcst.fit(df=obj.Y_train, verbose=False)
#         predicted = fcst.predict(df=obj.Y_train, verbose=False)
#         obj.plot_forecast(predicted, method='NBEATS')

#         # ======================================================================================================
        
#         nhits = joblib.load(fr"Results\NHITS\{pollutant}\{h}W_Study.pkl")
#         model = NHITS(
#             h=h,
#             input_size=nhits.best_trials[0].params.get('input_size'),
#             stack_types=nhits.best_trials[0].params.get('n_stacks')*['identity'],
#             n_freq_downsample=nhits.best_trials[0].params.get('n_freq_downsample')+(nhits.best_trials[0].params.get('n_stacks')-len(nhits.best_trials[0].params.get('n_freq_downsample')))*[1],
#             n_blocks=nhits.best_trials[0].params.get('n_stacks')*[nhits.best_trials[0].params.get('n_blocks')],
#             n_pool_kernel_size=(nhits.best_trials[0].params.get('n_stacks')-len(nhits.best_trials[0].params.get('n_pool_kernel_size')))*[1]+nhits.best_trials[0].params.get('n_pool_kernel_size'),
#             pooling_mode="MaxPool1d",
#             activation="ReLU",
#             interpolation_mode='linear',
#             max_steps=nhits.best_trials[0].params.get('max_steps'),
#             val_check_steps=10,
#         )
#         fcst = NeuralForecast(
#             models=[model],
#             freq='W',
#             local_scaler_type=nhits.best_trials[0].params.get('local_scalar_type')
#         )
#         fcst.fit(df=obj.Y_train, verbose=False)
#         predicted = fcst.predict(df=obj.Y_train, verbose=False)
#         obj.metrics_(predicted, method='NHITS')
#         obj.plot_forecast(predicted)

In [156]:
def plot_hyperparams_importance(metric='smape', nhits=True, type='poll'):

    if nhits:
        model = 'N-HiTS'
        params_importance = joblib.load(r'Results\Params Importance\Params_Importance_NHITS')
    else:
        model = 'N-BEATS'
        params_importance = joblib.load(r'Results\Params Importance\Params_Importance_NBEATS')

    hyperparams_sum = {}
    hyperparams_count = {}
    hyper_results = pd.DataFrame()

    if (type == 'poll'): concat = ['o3','no2','co','pm10','pm25']
    else: concat = ['12','26','52','78']

    for concat in concat:
        for pollutant_h, item in params_importance.items():
            for metric_, hyperparams in item.items():

                if (concat in pollutant_h) and (metric_ == metric):

                    for key, value in hyperparams.items():
                        if key not in hyperparams_sum:
                            hyperparams_sum[key] = 0
                            hyperparams_count[key] = 0
                        hyperparams_sum[key] += value
                        hyperparams_count[key] += 1

        mean_hyperparams = {key: hyperparams_sum[key] / hyperparams_count[key] for key in hyperparams_sum}
        hyper_results = pd.concat([hyper_results, pd.DataFrame(mean_hyperparams, index=[f'{concat}'])])
    
    hyper_results = hyper_results.rename(columns={
        'input_size':'input',
        'n_freq_downsample':'downsample',
        'n_pool_kernel_size':'kernel',
        'local_scalar_type':'scalar',
        'n_blocks':'blocks',
        'n_stacks':'stacks',
        'max_steps':'epochs',
    })

    fig = go.Figure()
    colors = [
        "#000000",  # Black
        "#404040",  # Dark Gray
        "#808080",  # Medium Gray
        "#BFBFBF",  # Light Gray
        "#D9D9D9",  # Light Silver
    ]
    sorted_hyper_results = hyper_results.T
    sorted_hyper_results["mean"] = sorted_hyper_results.mean(axis=1)  # Add mean column
    sorted_hyper_results = sorted_hyper_results.sort_values(by="mean", ascending=True).drop(columns=["mean"])

    # Add a bar for each hyperparameter
    for idx, hyperparameter in enumerate(sorted_hyper_results.columns):
    # for hyperparameter in sorted_hyper_results.columns:
        fig.add_trace(
            go.Bar(
                y=sorted_hyper_results.index,  # Pollutants on the y-axis
                x=sorted_hyper_results[hyperparameter],  # Values on the x-axis
                name=hyperparameter,  # Name of the hyperparameter
                marker=dict(color=colors[idx % len(colors)]),  # Assign colors
                orientation="h",  # Horizontal bars
            )
        )

    # Update layout
    fig.update_layout(
        title="Hyperparameter Importance by Pollutant",
        xaxis_title="Importance",
        yaxis_title="Pollutant",
        barmode="group",  # Stack the bars
        legend_title="Hyperparameters",
    )

    main_layout(fig, title=f'{model} | {metric.upper()}', x='Params Importance', width=600, height=500)
    # Show the plot
    return fig

plot_hyperparams_importance(metric='smape', nhits=False, type='h')