In [None]:
import pandas as pd
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Carregar e processar os dados
Y_df = pd.read_csv('../dados_tratados/combinado/Piratininga/Piratininga_tratado_combinado.csv',
                   usecols=['PM2.5', 'Data e Hora'], low_memory=False)

Y_df['Data e Hora'] = pd.to_datetime(Y_df['Data e Hora'])
Y_df = Y_df.sort_values(by='Data e Hora')

# Adicionar coluna de identificação única e renomear colunas
Y_df['unique_id'] = '1'
Y_df = Y_df.rename(columns={'Data e Hora': 'ds', 'PM2.5': 'y'})
Y_df = Y_df[['unique_id', 'ds', 'y']]

# Filtrar datas e interpolar valores ausentes
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
Y_df = Y_df[(Y_df['ds'] >= '2019-01-01') & (Y_df['ds'] <= '2022-01-01')]
Y_df = Y_df.sort_values(by='ds').reset_index(drop=True)
Y_df = Y_df.interpolate(method='linear')

# Dividir os dados em treino, validação e teste
train_size = int(0.7 * len(Y_df))
val_size = int(0.15 * len(Y_df))
test_size = len(Y_df) - train_size - val_size

Y_train = Y_df[:train_size]
Y_val = Y_df[train_size:train_size+val_size]
Y_test = Y_df[train_size+val_size:]

# Configurar modelos
models = [
    NHITS(
        h=24,  # Horizonte de 24 horas
        input_size=168,  # Uma semana de dados de entrada
        max_steps=10,
        n_freq_downsample=[2, 1, 1],
        early_stop_patience_steps=5,
        enable_progress_bar=False
    )
]

nf = NeuralForecast(models=models, freq="60min")

Y_train_val = pd.concat([Y_train, Y_val])

predictions = []
temp_train = Y_train_val.copy()

while len(temp_train) < len(Y_df):
    nf.fit(df=temp_train, val_size=len(Y_val))
    
    # Calcular o número de pontos restantes para prever
    remaining_points = len(Y_df) - len(temp_train)
    forecast_size = min(24, remaining_points)
    
    forecast = nf.predict(df=Y_df.iloc[len(temp_train):len(temp_train)+forecast_size])
    
    predictions.append(forecast)
    
    next_points = Y_df.iloc[len(temp_train):len(temp_train)+forecast_size]
    temp_train = pd.concat([temp_train, next_points])

    if len(temp_train) >= len(Y_df):
        break

# Combinar previsões
all_predictions = pd.concat(predictions)

In [None]:
from sklearn.metrics import r2_score

# Preparar dados para avaliação
Y_test_actual = Y_df.iloc[train_size+val_size:]
Y_test_pred = all_predictions[all_predictions['ds'].isin(Y_test_actual['ds'])]


In [None]:
Y_test_actual

In [None]:
Y_test_pred

In [None]:
# Preparar dados para avaliação
Y_test_actual = Y_df.iloc[train_size+val_size:]
Y_test_pred = all_predictions[all_predictions['ds'].isin(Y_test_actual['ds'])]

# Calcular métricas
for model in models:
    model_name = model.__class__.__name__
    mse = mean_squared_error(Y_test_actual['y'], Y_test_pred[model_name])
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(Y_test_actual['y'], Y_test_pred[model_name])
    mape = np.mean(np.abs((Y_test_actual['y'] - Y_test_pred[model_name]) / Y_test_actual['y'])) * 100
    r2 = r2_score(Y_test_actual['y'], Y_test_pred[model_name])
    
    print(f"{model_name} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}%, R2: {r2:.4f}")

# Plotar resultados
plt.figure(figsize=(15, 8))
plt.plot(Y_test_actual['ds'], Y_test_actual['y'], label='Actual', alpha=0.5)
for model in models:
    model_name = model.__class__.__name__
    plt.plot(Y_test_pred['ds'], Y_test_pred[model_name], label=f'{model_name} Forecast')
plt.legend()
plt.title('Previsão de PM2.5 para o Conjunto de Teste')
plt.xlabel('Data')
plt.ylabel('PM2.5')
plt.show()

# Plotar gráfico de dispersão
plt.figure(figsize=(10, 10))
for model in models:
    model_name = model.__class__.__name__
    plt.scatter(Y_test_actual['y'], Y_test_pred[model_name], alpha=0.5)
    plt.plot([Y_test_actual['y'].min(), Y_test_actual['y'].max()], 
             [Y_test_actual['y'].min(), Y_test_actual['y'].max()], 
             'r--', lw=2)
plt.xlabel('Valores Reais')
plt.ylabel('Valores Previstos')
plt.title('Gráfico de Dispersão: Valores Reais vs. Previstos')
plt.show()

# Plotar gráfico de resíduos
plt.figure(figsize=(15, 8))
for model in models:
    model_name = model.__class__.__name__
    residuals = Y_test_actual['y'] - Y_test_pred[model_name]
    plt.scatter(Y_test_pred['ds'], residuals, alpha=0.5)
    plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Data')
plt.ylabel('Resíduos')
plt.title('Gráfico de Resíduos')
plt.show()