# ARIMA Forecasting Model

This notebook implements an ARIMA model on the `mock_kaggle.csv` dataset to generate demand forecasts.

In [None]:
# --- Imports ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

# --- Load data ---
RAW_URL = "https://raw.githubusercontent.com/MiltosTsir/supply-chain-analysis-portfolio/main/portfolio/01-demand-forecasting/data/raw/mock_kaggle.csv"
df = pd.read_csv(RAW_URL, parse_dates=["data"])
df = df.sort_values("data").reset_index(drop=True)

ts = df.set_index("data")["venda"].asfreq("D").fillna(0)
ts.head()

In [None]:
# --- Train/Test split ---
horizon = 60 if len(ts) > 120 else max(1, len(ts) // 5)
train, test = ts.iloc[:-horizon], ts.iloc[-horizon:]
print("Train size:", len(train), " Test size:", len(test))

In [None]:
# --- Fit ARIMA model ---
model = ARIMA(train, order=(5,1,0))  # ARIMA(p,d,q)
fit = model.fit()
print(fit.summary())

In [None]:
# --- Forecast ---
forecast = fit.forecast(steps=horizon)

plt.figure(figsize=(12,5))
plt.plot(train.index, train, label="Train")
plt.plot(test.index, test, label="Test")
plt.plot(test.index, forecast, label="ARIMA Forecast")
plt.title("ARIMA Forecast vs Actual")
plt.xlabel("Date")
plt.ylabel("Units Sold")
plt.legend()
plt.show()

# --- Evaluation ---
mae = mean_absolute_error(test, forecast)
rmse = np.sqrt(mean_squared_error(test, forecast))
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")