[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EN/quantlets/EMQA_feature_importance/EMQA_feature_importance.ipynb)

# EMQA_feature_importance

Feature importance from a Random Forest model trained on Romanian electricity prices.

**Output:** `ml_feature_importance.pdf`


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
import os

def load_ro_de():
    paths = [
        '../../charts/ro_de_prices_full.csv',
        '/Users/danielpele/Documents/Energy MBA/charts/ro_de_prices_full.csv',
    ]
    for p in paths:
        if os.path.exists(p):
            return pd.read_csv(p, parse_dates=['date'], index_col='date')
    raise FileNotFoundError("ro_de_prices_full.csv not found")

df = load_ro_de()


In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score

df = load_ro_de()
data = df[['ro_price', 'de_price']].dropna().copy()
data['target'] = data['ro_price']

# Lagged features
for lag in [1, 2, 7, 14, 30]:
    data[f'ro_lag_{lag}'] = data['ro_price'].shift(lag)
for lag in [1, 7]:
    data[f'de_lag_{lag}'] = data['de_price'].shift(lag)

# Rolling stats
for w in [7, 14, 30]:
    data[f'ro_ma_{w}'] = data['ro_price'].shift(1).rolling(w).mean()
    data[f'ro_std_{w}'] = data['ro_price'].shift(1).rolling(w).std()

# Temporal
data['dow'] = data.index.dayofweek
data['month'] = data.index.month
data['weekend'] = (data.index.dayofweek >= 5).astype(int)

data = data.dropna()
feature_cols = [c for c in data.columns if c not in ['target', 'ro_price', 'de_price']]

print(f"Dataset: {len(data)} rows, {len(feature_cols)} features")
print(f"Features: {feature_cols}")


In [None]:
# --- Train RF and extract feature importances ---
split = int(len(data) * 0.7)
X_train = data[feature_cols].iloc[:split]
y_train = data['target'].iloc[:split]

rf = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

importance = pd.Series(rf.feature_importances_, index=feature_cols).sort_values(ascending=True)
top12 = importance.tail(12)

# Assign colors by feature type
def feature_color(name):
    if 'de_' in name:
        return COLORS['green']   # German features
    elif 'spread' in name.lower():
        return COLORS['purple']  # Spread
    elif name in ['dow', 'month', 'weekend']:
        return COLORS['orange']  # Temporal
    else:
        return COLORS['blue']    # Romanian lags/stats

bar_colors = [feature_color(f) for f in top12.index]

fig, ax = plt.subplots(figsize=(12, 7))
bars = ax.barh(range(len(top12)), top12.values, color=bar_colors, alpha=0.85, edgecolor='white', lw=1)

ax.set_yticks(range(len(top12)))
ax.set_yticklabels(top12.index, fontsize=11)
ax.set_xlabel('Importance', fontsize=12)
ax.set_title('Top 12 Feature Importances (Random Forest)', fontsize=15, fontweight='bold')

# Add value labels
for i, (val, bar) in enumerate(zip(top12.values, bars)):
    ax.text(val + 0.002, i, f'{val:.3f}', va='center', fontsize=10)

# Legend for color coding
import matplotlib.patches as mpatches
legend_items = [
    mpatches.Patch(color=COLORS['blue'], label='Romanian price lags/stats'),
    mpatches.Patch(color=COLORS['green'], label='German price features'),
    mpatches.Patch(color=COLORS['purple'], label='Spread features'),
    mpatches.Patch(color=COLORS['orange'], label='Temporal features'),
]
ax.legend(handles=legend_items, loc='upper center', bbox_to_anchor=(0.5, -0.10),
          frameon=False, ncol=4, fontsize=10)

fig.tight_layout()
save_fig(fig, 'ml_feature_importance.pdf')
plt.show()
