[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EN/quantlets/EMQA_model_selection/EMQA_model_selection.ipynb)

# EMQA_model_selection

**AIC / BIC Model Selection Grid**

Fit multiple ARIMA(p,d,q) specifications to Brent oil returns and display AIC values in a heatmap to identify the best model.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
import yfinance as yf

def fetch(ticker, start='2020-01-01', end='2025-12-31'):
    d = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(d.columns, pd.MultiIndex):
        return d['Close'].squeeze().dropna()
    return d['Close'].dropna()


In [None]:
from statsmodels.tsa.arima.model import ARIMA
import itertools

brent = fetch('BZ=F')
log_ret = np.log(brent / brent.shift(1)).dropna()

p_range = range(0, 4)
d_range = range(0, 2)
q_range = range(0, 4)

results = []
for p, d, q in itertools.product(p_range, d_range, q_range):
    try:
        model = ARIMA(log_ret, order=(p, d, q))
        fit = model.fit()
        results.append({'p': p, 'd': d, 'q': q,
                        'AIC': fit.aic, 'BIC': fit.bic})
    except Exception:
        results.append({'p': p, 'd': d, 'q': q,
                        'AIC': np.nan, 'BIC': np.nan})

df_res = pd.DataFrame(results)
print(f"Tested {len(df_res)} model specifications")
print()

# Best models
best_aic = df_res.loc[df_res['AIC'].idxmin()]
best_bic = df_res.loc[df_res['BIC'].idxmin()]
print(f"Best AIC: ARIMA({int(best_aic.p)},{int(best_aic.d)},{int(best_aic.q)}) = {best_aic.AIC:.2f}")
print(f"Best BIC: ARIMA({int(best_bic.p)},{int(best_bic.d)},{int(best_bic.q)}) = {best_bic.BIC:.2f}")


In [None]:
# Heatmaps for d=0 and d=1
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

for idx, d_val in enumerate([0, 1]):
    subset = df_res[df_res['d'] == d_val].copy()
    pivot = subset.pivot_table(index='p', columns='q', values='AIC')

    im = axes[idx].imshow(pivot.values, cmap='RdYlGn_r', aspect='auto')
    axes[idx].set_xticks(range(len(pivot.columns)))
    axes[idx].set_xticklabels(pivot.columns.astype(int))
    axes[idx].set_yticks(range(len(pivot.index)))
    axes[idx].set_yticklabels(pivot.index.astype(int))
    axes[idx].set_xlabel('q (MA order)')
    axes[idx].set_ylabel('p (AR order)')
    axes[idx].set_title(f'AIC Heatmap — d = {d_val}', fontweight='bold')

    # Annotate cells
    for r in range(len(pivot.index)):
        for c in range(len(pivot.columns)):
            val = pivot.values[r, c]
            if not np.isnan(val):
                axes[idx].text(c, r, f'{val:.0f}', ha='center', va='center', fontsize=8,
                               color='white' if val > pivot.values[~np.isnan(pivot.values)].mean() else 'black')

    # Highlight best in this panel
    best_in_panel = subset.loc[subset['AIC'].idxmin()]
    bp = int(best_in_panel.p)
    bq = int(best_in_panel.q)
    r_idx = list(pivot.index).index(bp)
    c_idx = list(pivot.columns).index(bq)
    rect = plt.Rectangle((c_idx - 0.5, r_idx - 0.5), 1, 1,
                          linewidth=3, edgecolor=COLORS['red'], facecolor='none')
    axes[idx].add_patch(rect)

    plt.colorbar(im, ax=axes[idx], shrink=0.8)

fig.suptitle('ARIMA Model Selection — AIC Grid (Brent Oil Returns)',
             fontsize=14, fontweight='bold', y=1.02)
fig.tight_layout()
save_fig(fig, 'model_selection_aic_bic.pdf')
plt.show()


In [None]:
# Summary table of top 10 models by AIC
top10 = df_res.nsmallest(10, 'AIC')[['p', 'd', 'q', 'AIC', 'BIC']].reset_index(drop=True)
top10.index += 1
top10.index.name = 'Rank'
print("Top 10 Models by AIC:")
print(top10.to_string())
