In [1]:
import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

import optuna
import itertools
import shutil
import time
import functools

import pandas as pd
import numpy as np
np.random.seed(1)

import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots
import plotly.io as pio
from graphmodex import plotlymodex
pio.renderers.default = 'notebook'

import joblib
import pickle
from IPython.display import clear_output

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import neuralforecast
import mlforecast
import statsforecast
import utilsforecast
import coreforecast

from pytorch_lightning import Trainer
trainer = Trainer(
    max_steps=4,
    logger=False,
    enable_progress_bar=False,
    enable_model_summary=False
)

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="optuna")

2026-02-04 15:13:29,207	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2026-02-04 15:13:29,862	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
ðŸ’¡ Tip: For seamless cloud logging and experiment tracking, try installing [litlogger](https://pypi.org/project/litlogger/) to enable LitLogger, which logs metrics and artifacts automatically to the Lightning Experiments platform.


In [3]:
df = pd.read_parquet(r'..\Data\CAMS\processed\eac4_era5_2010_2024_brasil_enhanced.parquet')

In [4]:
pm10 = (
    df
    .rename(columns={
        'pm10': 'y',
        'valid_time': 'ds'        
    })
    .query("unique_id.isin([10, 11, 12, 13])")
    [['unique_id', 'ds', 'y']]
)

pm2p5 = (
    df
    .rename(columns={
        'pm2p5': 'y',
        'valid_time': 'ds'        
    })
    .query("unique_id.isin([10, 11, 12, 13])")
    [['unique_id', 'ds', 'y']]
)

go3 = (
    df
    .rename(columns={
        'go3': 'y',
        'valid_time': 'ds'        
    })
    .query("unique_id.isin([10, 11, 12, 13])")
    [['unique_id', 'ds', 'y']]
)

no2 = (
    df
    .rename(columns={
        'no2': 'y',
        'valid_time': 'ds'        
    })
    .query("unique_id.isin([0, 1, 2, 3])")
    [['unique_id', 'ds', 'y']]
)

In [5]:
from statsmodels.tsa.stattools import adfuller

def Augmented_Dickey_Fuller_Test_func(series , column_name):
    print (f'Dickey-Fuller test results for columns: {column_name}')
    dftest = adfuller(series, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','No Lags Used','Number of observations used'])
    for key,value in dftest[4].items():
       dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    if dftest[1] <= 0.05:
        print("Conclusion:====>")
        print("Reject the null hypothesis")
        print("The data is stationary")
    else:
        print("Conclusion:====>")
        print("The null hypothesis cannot be rejected")
        print("The data is not stationary")

Augmented_Dickey_Fuller_Test_func(no2["y"],'Sales')

Dickey-Fuller test results for columns: Sales
Test Statistic                    -36.566309
p-value                             0.000000
No Lags Used                       78.000000
Number of observations used    175249.000000
Critical Value (1%)                -3.430387
Critical Value (5%)                -2.861556
Critical Value (10%)               -2.566779
dtype: float64
Conclusion:====>
Reject the null hypothesis
The data is stationary


In [12]:
import matplotlib.pyplot as plt
import statsmodels.tsa.api as smt
import statsmodels.api as sm

def tsplot(y, lags=None, figsize=(12, 7), style='bmh'): # [3]
    if not isinstance(y, pd.Series):
        y = pd.Series(y)

    with plt.style.context(style):
        fig = plt.figure(figsize=figsize)
        layout = (2, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))

        y.plot(ax=ts_ax)
        p_value = sm.tsa.stattools.adfuller(y)[1]
        ts_ax.set_title('Time Series Analysis plot\n Dickey-Fuller: p={0:.5f}'.format(p_value))
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
        plt.tight_layout()

tsplot(pm10["y"].diff().dropna(), lags=50)