[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EN/quantlets/EMQA_ml_compare/EMQA_ml_compare.ipynb)

# EMQA_ml_compare
ML model comparison: Linear Regression, Random Forest, Gradient Boosting.
**Output:** `ml_compare.pdf`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
import yfinance as yf

def fetch(ticker, start='2020-01-01', end='2025-12-31'):
    d = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(d.columns, pd.MultiIndex):
        return d['Close'].squeeze().dropna()
    return d['Close'].dropna()


In [None]:
# Fetch Brent and create features (same as ml_rf)
brent = fetch('BZ=F', start='2018-01-01')
df = pd.DataFrame({'price': brent})
df['return'] = np.log(df['price'] / df['price'].shift(1))

for lag in [1, 2, 3, 7, 14]:
    df[f'ret_lag_{lag}'] = df['return'].shift(lag)

df['roll_mean_5'] = df['return'].rolling(5).mean()
df['roll_std_5'] = df['return'].rolling(5).std()
df['roll_mean_20'] = df['return'].rolling(20).mean()
df['roll_std_20'] = df['return'].rolling(20).std()
df['roll_skew_20'] = df['return'].rolling(20).skew()

df['target'] = df['return'].shift(-1)
df = df.dropna()

feature_cols = [c for c in df.columns if c not in ['price', 'return', 'target']]
X = df[feature_cols].values
y = df['target'].values

split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score

models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42, n_jobs=-1),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=200, max_depth=5, random_state=42),
}

results = {}
for name_m, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Simple directional Sharpe proxy
    pred_sign = np.sign(y_pred)
    strat_ret = pred_sign * y_test
    sharpe = strat_ret.mean() / strat_ret.std() * np.sqrt(252) if strat_ret.std() > 0 else 0

    results[name_m] = {'MAE': mae, 'R2': r2, 'Sharpe': sharpe}
    print(f"{name_m}: MAE={mae:.6f}, R2={r2:.4f}, Sharpe={sharpe:.2f}")

res_df = pd.DataFrame(results).T

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

bar_colors = [COLORS['blue'], COLORS['green'], COLORS['orange']]

# Panel 1 - MAE
bars1 = axes[0].bar(res_df.index, res_df['MAE'], color=bar_colors, width=0.5, edgecolor='white')
for bar, val in zip(bars1, res_df['MAE']):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.00005,
                 f'{val:.5f}', ha='center', va='bottom', fontsize=10)
axes[0].set_ylabel('MAE')
axes[0].set_title('Mean Absolute Error')
axes[0].tick_params(axis='x', rotation=15)

# Panel 2 - R-squared
bars2 = axes[1].bar(res_df.index, res_df['R2'], color=bar_colors, width=0.5, edgecolor='white')
for bar, val in zip(bars2, res_df['R2']):
    offset = 0.002 if val >= 0 else -0.002
    va = 'bottom' if val >= 0 else 'top'
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + offset,
                 f'{val:.4f}', ha='center', va=va, fontsize=10)
axes[1].set_ylabel('$R^2$')
axes[1].set_title('R-squared')
axes[1].tick_params(axis='x', rotation=15)
axes[1].axhline(0, color=COLORS['gray'], linestyle=':', linewidth=0.8)

plt.tight_layout()
save_fig(fig, 'ml_compare.pdf')
plt.show()