In [109]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.express as px

from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, LSTM, RNN

df = pd.read_pickle(r'C:\Users\gustavo.filho\Documents\Python\TCC\Data_SP.pkl')
df = df.sort_values(by='date')

import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

In [None]:
# Marg.Tietê-Ponte dos Remédios, São Paulo, Brazil --> CO (NO2, PM10, PM2.5)
# Guarulhos, São Paulo, Brazil --> O3 (PM10)

In [None]:
# Prioridade
# Marg.Tietê-Ponte dos Remédios, São Paulo, Brazil --> 14\23

# Não muito bom
# Congonhas, São Paulo, Brazil --> 14\23
# Osasco, São Paulo, Brazil --> 14\21
# Parelheiros, São Paulo, Brazil --> 14\23 (poucos poluentes)
# Guarulhos, São Paulo, Brazil --> 14\23 O3

## GUMODEX

In [2]:
# USED WITH PLOTLY TO ELABORATE THE DESIGN ===========================================================
def main_layout(fig:go.Figure, width=700, height=600, x=None, y=None, title=None,
               x_range=None, y_range=None, paper_color='white', 
               customdata=None, hover_customdata='Info', 
               hover_x='x',hover_y='y', **kwargs) -> go.Figure:
    fig.layout = go.Layout(
        width=width,
        height=height,
        plot_bgcolor=paper_color,
        paper_bgcolor=paper_color,
        xaxis={'gridcolor':'#cccccc', 'linecolor':'black','title':x, 'range':x_range},
        yaxis={'gridcolor':'#cccccc', 'linecolor':'black','title':y, 'range':y_range},
        title={'text':title},
        **kwargs
    )
    if customdata == 'no':
        ...
    elif customdata is None:
        fig.update_traces(patch={
            'customdata':customdata, 'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}'
        })
    else:
        fig.update_traces(patch={
            'customdata':customdata,
            'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}<br>' + hover_customdata + ': %{customdata}<br>'
        })
    return fig
# ====================================================================================================
def main_subplot_layout(fig:go.Figure, width=1400, height=500, title=None, paper_color='white',
                        x=None, y=None, rows=1, cols=2, x_range=None, y_range=None,
                        customdata=None, hover_customdata='Info', 
                        hover_x='x',hover_y='y', **kwargs) -> go.Figure:
    fig.update_layout({
        'width':width,
        'height':height,
        'plot_bgcolor':paper_color,
        'paper_bgcolor':paper_color,
        'title':title,
        **kwargs
    })
    for xaxis in fig.select_xaxes():
        xaxis.update(
            showgrid=True,
            gridcolor='#CCCCCC',
            linecolor='black',
            title=x,
            range=x_range
        )
    for yaxis in fig.select_yaxes():
        yaxis.update(
            showgrid=True,
            gridcolor='#CCCCCC',
            linecolor='black',
            title=y,
            range=y_range
        )
    if customdata == 'no':
        ...
    elif customdata is None:
        fig.update_traces(patch={
            'customdata':customdata, 'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}'
        })
    else:
        fig.update_traces(patch={
            'customdata':customdata,
            'hovertemplate': hover_x + ': %{x}<br>' + hover_y + ': %{y}<br>' + hover_customdata + ': %{customdata}<br>'
        })
    return fig
# ====================================================================================================

# **Time Object**

In [None]:
class TimeObject:
    def __init__(self, df:pd.DataFrame, column:str, 
                 NAN_treatment_args:dict={'method':'from_derivatives'}) -> None:

        self.df = df[['date',column]]
        self.column = column
        self.time_serie = self.to_serie_()

        self.NAN_treatment_(**NAN_treatment_args)
        self.NIXTLA_treatment_()

    def to_serie_(self) -> pd.Series:
        time_serie = self.df[self.column].fillna(np.nan)
        time_serie.index = pd.to_datetime(self.df['date'])

        full_index = pd.date_range(start=time_serie.index.min(), end=time_serie.index.max(), freq='D')
        time_serie = time_serie.reindex(full_index)
        return time_serie
    
    def NAN_treatment_(self, **kwargs) -> None:
        self.time_serie = self.time_serie.interpolate(**kwargs)
    
    def NIXTLA_treatment_(self) -> None:
        self.nixtla_df = pd.DataFrame()
        self.nixtla_df.loc[:, ['ds']] = pd.to_datetime(self.time_serie.index)
        self.nixtla_df.loc[:, ['y']] = self.time_serie.values
        self.nixtla_df.loc[:, ['unique_id']] = 1.0

    def plot(self) -> go.Figure:
        fig = go.Figure()
        fig.add_trace(trace=go.Scatter(
            x=self.time_serie.index, y=self.time_serie,
            marker=dict(color='#222222')
        ))
        return fig

    def NIXTLA_train_test(self, split:int=12):
        self.Y_train = self.nixtla_df[self.nixtla_df.ds<self.nixtla_df['ds'].values[-split]]
        self.Y_test = self.nixtla_df[self.nixtla_df.ds>=self.nixtla_df['ds'].values[-split]].reset_index(drop=True)

    def plot_forecast(self, forecast_df:pd.DataFrame):
        ...

In [120]:
obj = TimeObject(df=df, column='co',)
obj.NIXTLA_train_test(split=100)

In [127]:
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import DistributionLoss

obj.NIXTLA_train_test(split=365)

model = NHITS(h=365,
              input_size=365,
              loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=True),
              n_freq_downsample=[180, 90, 60, 30, 15, 5, 1, 1, 1],
              max_steps=200,
              val_check_steps=365,
              learning_rate=1e-3)

fcst = NeuralForecast(
    models=[model],
    freq='D'
)
fcst.fit(df=obj.Y_train, val_size=365, verbose=False)
prediction = fcst.predict(futr_df=obj.Y_test, verbose=False)

Seed set to 1

val_check_steps is greater than max_steps, setting val_check_steps to max_steps.

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type             | Params | Mode 
----------------------------------------------------------
0 | loss         | DistributionLoss | 5      | train
1 | padder_train | ConstantPad1d    | 0      | train
2 | scaler       | TemporalNorm     | 0      | train
3 | blocks       | ModuleList       | 3.3 M  | train
----------------------------------------------------------
3.3 M     Trainable params
5         Non-trainable params
3.3 M     Total params
13.280    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=200` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [128]:
fig = go.Figure()

fig.add_trace(trace=go.Scatter(
    x=obj.Y_train['ds'], y=obj.Y_train['y'],
    mode='lines', marker=go.scatter.Marker(
        color='black'
    ), name='train'
))

fig.add_trace(trace=go.Scatter(
    x=obj.Y_test['ds'], y=obj.Y_test['y'],
    mode='lines', marker=go.scatter.Marker(
        color='skyblue'
    ), name='test'
))

fig.add_trace(trace=go.Scatter(
    x=prediction['ds'], y=prediction['NHITS'],
    mode='lines', marker=go.scatter.Marker(
        color='orange'
    ), name='prediction'
))

fig.add_trace(go.Scatter(
    x=prediction['ds'], y=prediction['NHITS-lo-90'],
    mode='lines',line=dict(width=0), fill=None, 
    showlegend=False 
))

fig.add_trace(go.Scatter(
    x=prediction['ds'], y=prediction['NHITS-hi-90'],
    mode='lines', line=dict(width=0), fill='tonexty',
    fillcolor='rgba(255, 165, 0, 0.2)',
    showlegend=False
))

main_layout(fig=fig, width=1100, height=450, title='Forecast', x='time', y='[ ]')

fig.show()

In [8]:
from neuralforecast.tsdataset import TimeSeriesDataset

Y_train_df = obj.Y_train[obj.Y_train.ds<='2024-06-11'] # 132 train
Y_test_df = obj.Y_train[obj.Y_train.ds>'2024-06-11']   # 12 test
dataset, *_ = TimeSeriesDataset.from_df(Y_train_df)

In [107]:
obj = TimeObject(df=df, column='y',)
obj.NIXTLA_train_test(split=365)

from neuralforecast.auto import AutoNHITS

config = dict(max_steps=2, val_check_steps=1, input_size=365,
              mlp_units=3*[[8, 8]])
model = AutoNHITS(h=365, config=config, num_samples=10, cpus=1)

# Fit and predict
model.fit(dataset=dataset)
y_hat = model.predict(dataset=dataset)

# Optuna
model = AutoNHITS(h=365, config=None, backend='optuna')

2024-12-03 15:26:24,194	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/gustavo.filho/ray_results/_train_tune_2024-12-03_15-26-05' in 0.0705s.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | eval 
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 22.1 K | train
-------------------------------------------------------
22.1 K    Trainable params
0         Non-trainable params
22.1 K    Total params
0.088     Total estimated model params size (MB)
33        Modules in train mode
1         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=2` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1)` was configured so validation will run after every batch.


Predicting: |          | 0/? [00:00<?, ?it/s]

In [None]:
fcst = NeuralForecast(
    models=[model],
    freq='D'
)
fcst.fit(df=obj.Y_train, val_size=365, verbose=False)
prediction = fcst.predict(futr_df=obj.Y_test, verbose=False)

In [None]:
fig = go.Figure()

fig.add_trace(trace=go.Scatter(
    x=obj.Y_train['ds'], y=obj.Y_train['y'],
    mode='lines', marker=go.scatter.Marker(
        color='black'
    ), name='train'
))

fig.add_trace(trace=go.Scatter(
    x=obj.Y_test['ds'], y=obj.Y_test['y'],
    mode='lines', marker=go.scatter.Marker(
        color='skyblue'
    ), name='test'
))

fig.add_trace(trace=go.Scatter(
    x=prediction['ds'], y=np.round(prediction['AutoNHITS'],0),
    mode='lines', marker=go.scatter.Marker(
        color='orange'
    ), name='prediction'
))

main_layout(fig=fig, width=1100, height=450, title='Forecast', x='time', y='[ ]')

fig.show()

In [78]:
np.abs((obj.Y_test['y'] - round(prediction['AutoNHITS'],0))/obj.Y_test['y']).mean()

np.float64(0.7974625729587785)

In [80]:
# Generate the future dataframe that includes the necessary combinations of id and time
futr_df = fcst.make_future_dataframe(df=obj.Y_train)  # Adjust 'periods' to match your forecast horizon

# Now, use the generated futr_df for prediction
prediction = fcst.predict(futr_df=futr_df, verbose=False)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [103]:
obj = TimeObject(df=df, column='y',)
obj.NIXTLA_train_test(split=100)

In [None]:
from neuralforecast.auto import AutoNHITS

config = dict(max_steps=2, val_check_steps=1, input_size=100,
              mlp_units=3*[[8, 8]])
model = AutoNHITS(h=100, config=config, num_samples=10, cpus=1)

# Fit and predict
model.fit(dataset=dataset)
y_hat = model.predict(dataset=dataset)

# Optuna
model = AutoNHITS(h=100, config=None, backend='optuna')

fcst = NeuralForecast(
    models=[model],
    freq='D'
)
fcst.fit(df=obj.Y_train, val_size=100, verbose=False)
prediction = fcst.predict(futr_df=obj.Y_test, verbose=False)

In [106]:
fig = go.Figure()

fig.add_trace(trace=go.Scatter(
    x=obj.Y_train['ds'], y=obj.Y_train['y'],
    mode='lines', marker=go.scatter.Marker(
        color='black'
    ), name='train'
))

fig.add_trace(trace=go.Scatter(
    x=obj.Y_test['ds'], y=obj.Y_test['y'],
    mode='lines', marker=go.scatter.Marker(
        color='skyblue'
    ), name='test'
))

fig.add_trace(trace=go.Scatter(
    x=prediction['ds'], y=prediction['AutoNHITS'],
    mode='lines', marker=go.scatter.Marker(
        color='orange'
    ), name='prediction'
))

main_layout(fig=fig, width=1100, height=450, title='Forecast', x='time', y='[ ]')

fig.show()