In [None]:
# VENDAS - PREVISÃO COM MODELOS SELECIONADOS + RF/XGB (36 meses)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsforecast import StatsForecast
from statsforecast.models import AutoETS, AutoARIMA, Naive

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor


In [None]:
# 1. Carregar dados
df = pd.read_csv(r"resumo_vendas_regiao 1.csv")

df = df.rename(columns={"data": "ds", "vendas": "y"})
df['ds'] = pd.to_datetime(df['ds'])

# Agrupar por mês
df_grouped = df.groupby(df['ds'].dt.to_period('M'))['y'].sum().reset_index()
df_grouped['ds'] = df_grouped['ds'].dt.to_timestamp()

# Ajustar colunas para StatsForecast
df_grouped['unique_id'] = 'serie1'
df_grouped = df_grouped[['unique_id', 'ds', 'y']]

print(df_grouped.head())


In [None]:
# 2. Definir modelos (StatsForecast)
sf = StatsForecast(
    models=[
        AutoETS(season_length=12),
        AutoARIMA(season_length=12),
        Naive()
    ],
    freq='ME',
    n_jobs=-1
)


In [None]:
# 3. Cross-validation (backtest) - 12 meses
cv_results = sf.cross_validation(
    df=df_grouped,
    h=12,
    step_size=12,
    n_windows=3
)

print(cv_results.head())


In [None]:
# 4. Função de métricas
def calc_metrics(df, model):
    df_eval = df.dropna(subset=['y', model])
    y_true = df_eval['y'].values
    y_pred = df_eval[model].values
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae  = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    r2   = r2_score(y_true, y_pred)
    return {"RMSE": rmse, "MAE": mae, "MAPE": mape, "R2": r2}


In [None]:
# 5. Avaliar modelos do StatsForecast
models_to_eval = ['AutoETS', 'AutoARIMA', 'Naive']
metrics = {m: calc_metrics(cv_results, m) for m in models_to_eval}


In [None]:
# 6. Random Forest e XGBoost (últimos 12 meses)

df_ml = df_grouped.copy()
df_ml['year'] = df_ml['ds'].dt.year
df_ml['month'] = df_ml['ds'].dt.month

# Criar lags
df_ml['lag1'] = df_ml['y'].shift(1)
df_ml['lag2'] = df_ml['y'].shift(2)
df_ml['lag3'] = df_ml['y'].shift(3)
df_ml = df_ml.dropna()

X = df_ml[['year', 'month', 'lag1', 'lag2', 'lag3']]
y = df_ml['y']

# Últimos 12 meses como teste
X_train, X_test = X[:-12], X[-12:]
y_train, y_test = y[:-12], y[-12:]

# Random Forest
rf = RandomForestRegressor(n_estimators=300, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# XGBoost
xgb = XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=5, random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

def calc_metrics_array(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae  = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    r2   = r2_score(y_true, y_pred)
    return {"RMSE": rmse, "MAE": mae, "MAPE": mape, "R2": r2}

metrics['RandomForest'] = calc_metrics_array(y_test, y_pred_rf)
metrics['XGBoost'] = calc_metrics_array(y_test, y_pred_xgb)

metrics_df = pd.DataFrame(metrics).T
print(metrics_df)


In [None]:
# 7. Escolher melhor modelo pelo RMSE
best_model = metrics_df['RMSE'].idxmin()
print("📌 Melhor modelo:", best_model)
print(metrics_df.loc[best_model])


In [None]:
# 8. Previsão final para 36 meses (StatsForecast)
sf.fit(df_grouped)
forecast_36 = sf.predict(h=36)
print(forecast_36.head())


In [None]:
# 9. Gráficos

# Backtest StatsForecast
plt.figure(figsize=(12,6))
plt.plot(cv_results['ds'], cv_results['y'], label='Real', color='black')
for model in models_to_eval:
    plt.plot(cv_results['ds'], cv_results[model], label=model, alpha=0.7)
plt.legend()
plt.title("Backtest - Modelos StatsForecast (12 meses)")
plt.show()

# Comparação ML nos últimos 12 meses
plt.figure(figsize=(12,6))
plt.plot(df_ml['ds'][-12:], y_test, label="Real", color="black", linewidth=2)
plt.plot(df_ml['ds'][-12:], y_pred_rf, label="Random Forest", marker="o")
plt.plot(df_ml['ds'][-12:], y_pred_xgb, label="XGBoost", marker="x")
plt.legend()
plt.title("Comparação ML - Últimos 12 Meses")
plt.show()

# Previsão final 36 meses para cada modelo do StatsForecast
for model in models_to_eval:
    plt.figure(figsize=(12,6))
    plt.plot(df_grouped['ds'], df_grouped['y'], label="Histórico")
    plt.plot(forecast_36['ds'], forecast_36[model], label=f"Previsão {model}", color="red")
    plt.legend()
    plt.title(f"Previsão Final (36 meses) - {model}")
    plt.show()
