# Day 16: ARIMA Parameter Selection

## Systematic Exploration of ARIMA Parameters

Master grid search, information criteria (AIC/BIC), and automated parameter selection with auto_arima.

## 1. Import Libraries and Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.subplots as sp
import warnings
warnings.filterwarnings('ignore')

# Time Series
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

# Display settings
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("✓ Libraries imported successfully")

In [None]:
# Load gold prices
try:
    df = pd.read_csv('../data/gold_prices.csv', parse_dates=['Date'])
except:
    print("⚠ Using fallback data loading")
    df = None

if df is not None:
    # Clean data
    if 'Price' not in df.columns:
        df = df.rename(columns={'Adj Close': 'Price'})
    df = df.drop_duplicates(subset=['Date']).sort_values('Date').reset_index(drop=True)

    print(f"✓ Data loaded: {len(df)} observations")
    print(f"Date range: {df['Date'].min().date()} to {df['Date'].max().date()}")
    print(f"Price range: ${df['Price'].min():.2f} to ${df['Price'].max():.2f}")
    print(f"\nPrice Statistics:")
    print(df['Price'].describe())
    
    # Train-test split
    train_size = int(len(df) * 0.8)
    train_data = df[:train_size].copy()
    test_data = df[train_size:].copy()
    
    print(f"\nTrain/Test Split:")
    print(f"  Train: {len(train_data)} ({len(train_data)/len(df)*100:.1f}%)")
    print(f"  Test: {len(test_data)} ({len(test_data)/len(df)*100:.1f}%)")

## 2. Stationarity Verification

In [None]:
# ADF test on original and differenced
def adf_test(series, name):
    result = adfuller(series, autolag='AIC')
    is_stat = result[1] <= 0.05
    
    print(f"\n{name}:")
    print(f"  ADF Stat: {result[0]:.6f}")
    print(f"  P-value: {result[1]:.6f}")
    print(f"  Status: {'✓ STATIONARY' if is_stat else '✗ NON-STATIONARY'}")
    return is_stat

prices = train_data['Price'].values
adf_test(prices, "Original Series")

diff1 = np.diff(prices, n=1)
adf_test(diff1, "First Difference (d=1)")

print("\n" + "="*50)
print("✓ Recommendation: d=1")

## 3. Grid Search for Optimal Parameters

In [None]:
# Define parameter ranges
p_range = range(0, 6)
d_range = range(0, 3)
q_range = range(0, 6)

print(f"Parameter Space:")
print(f"  p (AR): {list(p_range)}")
print(f"  d (differencing): {list(d_range)}")
print(f"  q (MA): {list(q_range)}")
print(f"  Total combinations: {len(p_range) * len(d_range) * len(q_range)}")

# Grid search
grid_results = []
start_time = time.time()
successful = 0
failed = 0

print("\nFitting ARIMA models...")

for p in p_range:
    for d in d_range:
        for q in q_range:
            try:
                model = ARIMA(train_data['Price'], order=(p, d, q))
                fitted = model.fit()
                
                # Forecast
                forecast = fitted.get_forecast(steps=len(test_data))
                forecast_values = forecast.predicted_mean.values
                
                # Metrics
                aic = fitted.aic
                bic = fitted.bic
                rmse = np.sqrt(mean_squared_error(test_data['Price'], forecast_values))
                
                grid_results.append({
                    'p': p,
                    'd': d,
                    'q': q,
                    'order': f"({p},{d},{q})",
                    'AIC': aic,
                    'BIC': bic,
                    'RMSE': rmse,
                    'Model': fitted
                })
                successful += 1
            except:
                failed += 1

elapsed = time.time() - start_time
results_df = pd.DataFrame(grid_results)
print(f"✓ Completed in {elapsed:.1f}s")
print(f"✓ Successful: {successful}")
print(f"✗ Failed: {failed}")

## 4. Best Models by Criterion

In [None]:
# Best by AIC
best_aic_idx = results_df['AIC'].idxmin()
best_aic = results_df.loc[best_aic_idx]

print("Best by AIC (fit quality):")
print(f"  Model: ARIMA{best_aic['order']}")
print(f"  AIC: {best_aic['AIC']:.2f}")
print(f"  BIC: {best_aic['BIC']:.2f}")
print(f"  RMSE: {best_aic['RMSE']:.2f}")

# Best by BIC
best_bic_idx = results_df['BIC'].idxmin()
best_bic = results_df.loc[best_bic_idx]

print("\nBest by BIC (parsimony):")
print(f"  Model: ARIMA{best_bic['order']}")
print(f"  AIC: {best_bic['AIC']:.2f}")
print(f"  BIC: {best_bic['BIC']:.2f}")
print(f"  RMSE: {best_bic['RMSE']:.2f}")

# Best by RMSE
best_rmse_idx = results_df['RMSE'].idxmin()
best_rmse = results_df.loc[best_rmse_idx]

print("\nBest by Test RMSE (forecast accuracy):")
print(f"  Model: ARIMA{best_rmse['order']}")
print(f"  AIC: {best_rmse['AIC']:.2f}")
print(f"  BIC: {best_rmse['BIC']:.2f}")
print(f"  RMSE: {best_rmse['RMSE']:.2f}")

## 5. Information Criteria Explanation

In [None]:
print("AIC vs BIC Information Criteria:")
print("\nAIC (Akaike Information Criterion):")
print(f"  Formula: AIC = -2·ln(L) + 2k")
print(f"  L = likelihood, k = parameters")
print(f"  Penalty: 2k (constant per parameter)")
print(f"  Favors: Better fit (may overfit)")
print(f"  Best for: Prediction accuracy")

print("\nBIC (Bayesian Information Criterion):")
print(f"  Formula: BIC = -2·ln(L) + k·ln(n)")
print(f"  L = likelihood, k = parameters, n = sample size")
print(f"  Penalty: k·ln(n) (grows with sample size)")
print(f"  Favors: Simpler models (avoid overfitting)")
print(f"  Best for: Model selection, interpretability")

print(f"\nWith n={len(train_data)} observations:")
print(f"  AIC penalty per parameter: 2")
print(f"  BIC penalty per parameter: ln({len(train_data)}) = {np.log(len(train_data)):.2f}")
print(f"  BIC is {np.log(len(train_data))/2:.1f}x more conservative than AIC")

## 6. Top Models by BIC

In [None]:
top_10 = results_df.nsmallest(10, 'BIC')[['order', 'AIC', 'BIC', 'RMSE']]
print("Top 10 Models by BIC:")
print(top_10.to_string(index=False))

# Analysis
print("\nObservations:")
aic_top = results_df.nsmallest(1, 'AIC')['order'].values[0]
bic_top = results_df.nsmallest(1, 'BIC')['order'].values[0]
if aic_top == bic_top:
    print(f"  ✓ AIC and BIC agree on {aic_top}")
else:
    print(f"  ✗ AIC prefers {aic_top}, BIC prefers {bic_top}")
    print(f"  → Disagreement suggests complex trade-off")

## 7. Auto ARIMA Automated Selection

In [None]:
print("Running auto_arima (stepwise optimization)...\n")

start_time = time.time()
auto_model = auto_arima(
    train_data['Price'],
    start_p=0, max_p=5,
    start_d=0, max_d=2,
    start_q=0, max_q=5,
    seasonal=False,
    stepwise=True,
    information_criterion='bic',
    trace=False,
    error_action='ignore',
    maxiter=50
)
auto_elapsed = time.time() - start_time

auto_order = auto_model.order
auto_aic = auto_model.aic()
auto_bic = auto_model.bic()

print(f"✓ Completed in {auto_elapsed:.1f}s")
print(f"\nAuto ARIMA Result:")
print(f"  Model: ARIMA{auto_order}")
print(f"  AIC: {auto_aic:.2f}")
print(f"  BIC: {auto_bic:.2f}")

# Forecast
auto_forecast = auto_model.predict(n_periods=len(test_data))
auto_rmse = np.sqrt(mean_squared_error(test_data['Price'], auto_forecast))
auto_mae = mean_absolute_error(test_data['Price'], auto_forecast)

print(f"  Test RMSE: {auto_rmse:.2f}")
print(f"  Test MAE: {auto_mae:.2f}")

## 8. Grid Search vs Auto ARIMA Comparison

In [None]:
# Create comparison table
comparison_data = {
    'Method': ['Grid (AIC)', 'Grid (BIC)', 'Grid (RMSE)', 'Auto ARIMA'],
    'Model': [
        f"ARIMA{best_aic['order']}",
        f"ARIMA{best_bic['order']}",
        f"ARIMA{best_rmse['order']}",
        f"ARIMA{auto_order}"
    ],
    'AIC': [f"{best_aic['AIC']:.2f}", f"{best_bic['AIC']:.2f}", f"{best_rmse['AIC']:.2f}", f"{auto_aic:.2f}"],
    'BIC': [f"{best_aic['BIC']:.2f}", f"{best_bic['BIC']:.2f}", f"{best_rmse['BIC']:.2f}", f"{auto_bic:.2f}"],
    'RMSE': [f"{best_aic['RMSE']:.2f}", f"{best_bic['RMSE']:.2f}", f"{best_rmse['RMSE']:.2f}", f"{auto_rmse:.2f}"]
}

comparison_df = pd.DataFrame(comparison_data)
print("Grid Search vs Auto ARIMA Comparison:")
print(comparison_df.to_string(index=False))

print("\nKey Observations:")
print(f"  • Grid search tested all {len(results_df)} combinations")
print(f"  • Auto ARIMA found solution in {auto_elapsed:.1f}s (stepwise algorithm)")
print(f"  • Different criteria select different models")
print(f"  • Best RMSE model may differ from AIC/BIC selections")

## 9. Visualization: AIC vs BIC Scatter

In [None]:
fig = go.Figure()

# All models
fig.add_trace(go.Scatter(
    x=results_df['AIC'],
    y=results_df['BIC'],
    mode='markers',
    marker=dict(
        size=7,
        color=results_df['RMSE'],
        colorscale='Plasma',
        showscale=True,
        colorbar=dict(title="Test RMSE")
    ),
    text=results_df['order'],
    hovertemplate='<b>%{text}</b><br>AIC: %{x:.2f}<br>BIC: %{y:.2f}<extra></extra>',
    name='Models'
))

# Highlight best by AIC
fig.add_trace(go.Scatter(
    x=[best_aic['AIC']],
    y=[best_aic['BIC']],
    mode='markers+text',
    marker=dict(size=15, color='red', symbol='star'),
    text=['Best AIC'],
    textposition='top center',
    name='Best AIC'
))

# Highlight best by BIC
fig.add_trace(go.Scatter(
    x=[best_bic['AIC']],
    y=[best_bic['BIC']],
    mode='markers+text',
    marker=dict(size=15, color='green', symbol='star'),
    text=['Best BIC'],
    textposition='top center',
    name='Best BIC'
))

# Highlight Auto ARIMA
fig.add_trace(go.Scatter(
    x=[auto_aic],
    y=[auto_bic],
    mode='markers+text',
    marker=dict(size=15, color='blue', symbol='diamond'),
    text=['Auto ARIMA'],
    textposition='top center',
    name='Auto ARIMA'
))

fig.update_layout(
    title="AIC vs BIC: Information Criteria Trade-off",
    xaxis_title="AIC (lower = better fit)",
    yaxis_title="BIC (lower = simpler)",
    height=600,
    hovermode='closest'
)
fig.show()

## 10. Key Insights

In [None]:
print("="*60)
print("KEY INSIGHTS: PARAMETER SELECTION METHODOLOGY")
print("="*60)

print("\n1. Information Criteria:")
print("   • AIC: Penalizes complexity moderately (factor 2k)")
print("   • BIC: Penalizes complexity strongly (factor k·ln(n))")
print("   • AIC favors fit, BIC favors parsimony")
print("   • With large n: BIC much more conservative")

print("\n2. When Criteria Agree:")
print("   ✓ High confidence in selected model")
print("   ✓ Model likely robust and optimal")
print("   ✓ Good choice for production use")

print("\n3. When Criteria Disagree:")
print("   • Complex parameter space with trade-offs")
print("   • Both models may be valid")
print("   • Choose based on use case:")
print("     - Prioritize accuracy → Use AIC model")
print("     - Prioritize simplicity → Use BIC model")

print("\n4. Grid Search Benefits:")
print("   ✓ Exhaustive: Tests all combinations")
print("   ✓ Transparent: See all models and metrics")
print("   ✓ Educational: Understand parameter landscape")
print("   ✗ Computationally expensive for large spaces")

print("\n5. Auto ARIMA Benefits:")
print("   ✓ Fast: Stepwise algorithm much quicker")
print("   ✓ Automatic: No manual parameter tuning")
print("   ✓ Practical: Good for production systems")
print("   ✗ May get stuck in local optimum")

print("\n6. Recommended Workflow:")
print("   1. Start with Auto ARIMA (fast overview)")
print("   2. Verify with Grid Search (confidence)")
print("   3. Compare AIC and BIC (understand trade-off)")
print("   4. Validate on holdout set (generalization)")
print("   5. Use simpler model if similar performance")

print("\n" + "="*60)
print("✓ Day 16 Complete: Parameter Selection Mastery!")
print("="*60)