# ARIMA Baseline Model (Endogenous Only)

**Objective**: Build a simple ARIMA model using only historical log returns (no exogenous variables)

**Model**: ARIMA(p, 0, q) where:
- p, q are determined by `pmdarima.auto_arima()`
- d=0 because we model stationary log returns
- No seasonal component (baseline)
- No exogenous variables (pure time series)

**Purpose**: Establish performance benchmark for comparison with SARIMAX and GARCH models

## 1. Import Libraries

In [None]:
# Data manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# ARIMA modeling
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf

# Metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Warnings
import warnings
warnings.filterwarnings('ignore')

# Plot settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (14, 6)

print("✓ Libraries imported successfully")

## 2. Load and Prepare Data

In [None]:
# Load dataset
df = pd.read_csv('../data/gold_silver.csv')

# Convert to datetime and set frequency
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.sort_values('DATE')
df.set_index('DATE', inplace=True)
df = df.asfreq('B')  # Business Day frequency

# Calculate log returns
df['GOLD_LOG_RETURN'] = np.log(df['GOLD_PRICE']) - np.log(df['GOLD_PRICE'].shift(1))
df = df.dropna(subset=['GOLD_LOG_RETURN'])

print(f"Dataset: {len(df)} observations")
print(f"Date range: {df.index.min()} to {df.index.max()}")
print(f"\nLog Returns Summary:")
print(df['GOLD_LOG_RETURN'].describe())

## 3. Train-Test Split (Chronological)

**Critical**: Use temporal split, NO shuffle

In [None]:
# 80-20 split
train_size = int(len(df) * 0.8)
train = df['GOLD_LOG_RETURN'].iloc[:train_size]
test = df['GOLD_LOG_RETURN'].iloc[train_size:]

# Store original prices for later transformation
train_prices = df['GOLD_PRICE'].iloc[:train_size]
test_prices = df['GOLD_PRICE'].iloc[train_size:]

print(f"Train set: {len(train)} observations ({train.index.min()} to {train.index.max()})")
print(f"Test set:  {len(test)} observations ({test.index.min()} to {test.index.max()})")
print(f"\nTest set represents {len(test)/len(df)*100:.1f}% of data")

## 4. Model Selection with auto_arima

In [None]:
# Use auto_arima to find optimal (p, q)
print("Running auto_arima... (this may take a few minutes)\n")

auto_model = pm.auto_arima(
    train,
    start_p=0, max_p=5,
    start_q=0, max_q=5,
    d=0,  # Already stationary (log returns)
    seasonal=False,  # No seasonality in baseline
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore',
    trace=True,
    information_criterion='aic'
)

print("\n" + "="*70)
print("OPTIMAL MODEL FOUND")
print("="*70)
print(auto_model.summary())

In [None]:
# Extract optimal orders
best_order = auto_model.order
print(f"\nBest ARIMA order: {best_order}")
print(f"AIC: {auto_model.aic():.2f}")
print(f"BIC: {auto_model.bic():.2f}")

## 5. Fit Final ARIMA Model

In [None]:
# Fit ARIMA with optimal parameters
model = ARIMA(train, order=best_order)
arima_fit = model.fit()

print("✓ ARIMA model fitted successfully")
print(f"\nModel: ARIMA{best_order}")
print(f"Log-Likelihood: {arima_fit.llf:.2f}")
print(f"AIC: {arima_fit.aic:.2f}")
print(f"BIC: {arima_fit.bic:.2f}")

## 6. Residual Diagnostics

In [None]:
# Residuals
residuals = arima_fit.resid

# Plot residuals
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Residuals over time
axes[0, 0].plot(residuals, linewidth=0.5)
axes[0, 0].axhline(y=0, color='red', linestyle='--')
axes[0, 0].set_title('Residuals Over Time', fontsize=12, fontweight='bold')
axes[0, 0].set_ylabel('Residual')

# Histogram
axes[0, 1].hist(residuals, bins=50, edgecolor='black', alpha=0.7)
axes[0, 1].set_title('Residuals Distribution', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Residual')

# ACF
plot_acf(residuals, lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF of Residuals', fontsize=12, fontweight='bold')

# ACF of squared residuals (ARCH test)
plot_acf(residuals**2, lags=40, ax=axes[1, 1])
axes[1, 1].set_title('ACF of Squared Residuals (ARCH Effects)', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print(f"Residuals Mean: {residuals.mean():.6f} (should be ~0)")
print(f"Residuals Std: {residuals.std():.6f}")
print(f"\n⚠ If ACF of squared residuals shows significant lags → ARCH effects present → GARCH needed")

## 7. Walk-Forward Validation (5-Day Ahead Forecasts)

**Strategy**: Rolling window with 5-step ahead forecasts

In [None]:
# Walk-forward validation setup
forecast_horizon = 5  # 5 days ahead (one trading week)
predictions_log = []
actuals_log = []
predictions_price = []
actuals_price = []

print("Running walk-forward validation...")
print(f"Forecast horizon: {forecast_horizon} days")
print(f"Test set size: {len(test)}")

# Use expanding window (not rolling)
for i in range(0, len(test) - forecast_horizon, forecast_horizon):
    # Train data: all past observations
    train_data = pd.concat([train, test.iloc[:i]])
    
    # Fit model
    model_temp = ARIMA(train_data, order=best_order)
    fit_temp = model_temp.fit()
    
    # Forecast 5 steps ahead
    forecast_log = fit_temp.forecast(steps=forecast_horizon)
    actual_log = test.iloc[i:i+forecast_horizon]
    
    predictions_log.extend(forecast_log.values)
    actuals_log.extend(actual_log.values)
    
    # Convert log returns to prices
    last_price = df['GOLD_PRICE'].iloc[train_size + i - 1]
    for j in range(len(forecast_log)):
        pred_price = last_price * np.exp(forecast_log.iloc[j])
        actual_price = df['GOLD_PRICE'].iloc[train_size + i + j]
        predictions_price.append(pred_price)
        actuals_price.append(actual_price)
        last_price = pred_price  # Iterative approach

print(f"✓ Generated {len(predictions_price)} forecasts")

## 8. Evaluate Model Performance

In [None]:
# Calculate metrics on PRICES (not log returns)
rmse = np.sqrt(mean_squared_error(actuals_price, predictions_price))
mae = mean_absolute_error(actuals_price, predictions_price)

# Naive benchmark (Random Walk: price_t = price_t-1)
naive_predictions = test_prices.shift(forecast_horizon).dropna().values[:len(actuals_price)]
naive_actuals = actuals_price[:len(naive_predictions)]
rmse_naive = np.sqrt(mean_squared_error(naive_actuals, naive_predictions))
mae_naive = mean_absolute_error(naive_actuals, naive_predictions)

print("="*70)
print("MODEL EVALUATION - 5-DAY AHEAD FORECASTS")
print("="*70)
print(f"\nARIMA{best_order} Model:")
print(f"  RMSE: ${rmse:.2f}")
print(f"  MAE:  ${mae:.2f}")
print(f"\nNaive Benchmark (Random Walk):")
print(f"  RMSE: ${rmse_naive:.2f}")
print(f"  MAE:  ${mae_naive:.2f}")
print(f"\nImprovement over Naive:")
print(f"  RMSE: {(1 - rmse/rmse_naive)*100:+.2f}%")
print(f"  MAE:  {(1 - mae/mae_naive)*100:+.2f}%")
print("="*70)

## 9. Visualize Forecasts

In [None]:
# Plot predictions vs actuals
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Price forecasts
forecast_dates = test.index[:len(predictions_price)]
axes[0].plot(forecast_dates, actuals_price, label='Actual Price', color='black', linewidth=1.5)
axes[0].plot(forecast_dates, predictions_price, label='ARIMA Forecast', color='blue', linewidth=1.5, alpha=0.7)
axes[0].set_title(f'ARIMA{best_order} - Gold Price Forecasts (5-Day Ahead)', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Price (USD)', fontsize=11)
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Forecast errors
errors = np.array(actuals_price) - np.array(predictions_price)
axes[1].plot(forecast_dates, errors, color='red', linewidth=1)
axes[1].axhline(y=0, color='black', linestyle='--', linewidth=1)
axes[1].fill_between(forecast_dates, errors, 0, alpha=0.3, color='red')
axes[1].set_title('Forecast Errors', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Error (USD)', fontsize=11)
axes[1].set_xlabel('Date', fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 10. Save Model Results

In [None]:
# Save results for comparison
results = {
    'model': f'ARIMA{best_order}',
    'rmse': rmse,
    'mae': mae,
    'rmse_naive': rmse_naive,
    'mae_naive': mae_naive,
    'n_predictions': len(predictions_price)
}

# Save to CSV
results_df = pd.DataFrame([results])
results_df.to_csv('../models/arima_baseline_results.csv', index=False)

# Save model
arima_fit.save('../models/arima_baseline_model.pkl')

print("✓ Model and results saved to 'models/' directory")
print("\nFiles created:")
print("  - arima_baseline_results.csv")
print("  - arima_baseline_model.pkl")

## 11. Key Findings

**Summary**:
1. ARIMA baseline establishes benchmark performance
2. Check ACF of squared residuals for ARCH effects
3. If present → GARCH extension needed (Notebook 04)
4. Next: Add exogenous variables (SARIMAX, Notebook 03)