In [7]:
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Funções para calcular MAPE e SMAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

# Carregando os dados (assumindo que você tenha um arquivo CSV chamado 'energy_data.csv')
df = pd.read_csv("../EDA/dataframe_model/stretchedSociety_block_109_MAC005547.csv")

# Imprimindo as colunas do DataFrame
print("Colunas no DataFrame:")
print(df.columns)

# Convertendo a coluna 'time' para datetime
df['time'] = pd.to_datetime(df['time'])

# Selecionando apenas as colunas necessárias
columns_needed = ['time', 'holiday', 'month', 'dayofweek_num', 'hour', 'Energy_kwh']
df = df[columns_needed]

# Renomeando colunas para o formato exigido pelo NeuralProphet
df = df.rename(columns={'time': 'ds', 'Energy_kwh': 'y'})

# Preparando as covariáveis
covariates = ['holiday', 'month', 'dayofweek_num', 'hour']

# Dividindo os dados em treino e teste
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=False)

# Criando e configurando o modelo NeuralProphet
model = NeuralProphet(
    n_forecasts=24,
    n_lags=24,
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=True,
)

# Adicionando as covariáveis ao modelo
for covariate in covariates:
    model.add_future_regressor(covariate)

# Treinando o modelo
metrics = model.fit(train_df, freq='H')

# Preparando os dados futuros para previsão
future = pd.concat([train_df.tail(model.n_lags), test_df])
future = future.reset_index(drop=True)

# Fazendo previsões
forecast = model.predict(future)

# Avaliando o modelo
actual = test_df['y']
predicted = forecast['yhat1'].tail(len(test_df))

mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))
mape = mean_absolute_percentage_error(actual, predicted)
smape = symmetric_mean_absolute_percentage_error(actual, predicted)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")
print(f"SMAPE: {smape:.2f}%")

# Plotando os resultados
fig1 = model.plot(forecast)
plt.title('Previsão vs. Valores Reais')
plt.show()

fig2 = model.plot_components(forecast)
plt.show()

# Plotando a importância das covariáveis
fig3, ax = plt.subplots()
model.plot_parameters(ax=ax)
plt.title('Importância das Covariáveis')
plt.show()

# Visualização de Erros
plt.figure(figsize=(12, 6))
plt.plot(test_df['ds'], actual - predicted, label='Erro')
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Erro de Previsão ao Longo do Tempo')
plt.xlabel('Data')
plt.ylabel('Erro (kWh)')
plt.legend()
plt.show()

# Análise de Resíduos
residuals = actual - predicted
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=50)
plt.title('Histograma dos Resíduos')
plt.xlabel('Erro (kWh)')
plt.ylabel('Frequência')
plt.show()



  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.988% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  df_grouped = df.groupby("ID").apply(lambda x: x.set_index("ds").resample(freq).asfreq()).drop(columns=["ID"])

INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.


Colunas no DataFrame:
Index(['time', 'Energy_kwh', 'house_hold', 'temperature', 'windSpeed',
       'precipType', 'icon', 'summary', 'holiday',
       'bool_weather_missing_values', 'year', 'month', 'day', 'hour',
       'dayofweek_num'],
      dtype='object')


INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 64
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 60


Training: |          | 0/? [00:00<?, ?it/s]



Finding best initial lr:   0%|          | 0/247 [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.951% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
  future_dates = pd.date_range(start=last_date, periods=periods + 1, freq=freq)  # An extra in case we include start

  df_i = pd.concat([df_i, future_df])

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

INFO - (NP.df_utils._infer_frequency) - Major frequency h corresponds to 99.952% of the data.
  aux_ts = pd.DataFrame(pd.date_range("1994-01-01", periods=100, freq=freq_str))

  converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)

  converted_ds = pd.to_datetime(ds_col, u

Predicting: |          | 0/? [00:00<?, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


ValueError: Input contains NaN.