# Feature Importance Analysis
This notebook explores multiple methods to inspect feature importance for the CatBoost model.

In [None]:
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier, Pool
from sklearn.inspection import permutation_importance

from model_catboost_final import MODEL_PARAMS

In [None]:
# Load dataset
csv_path = 'f1_data_2022_to_present.csv'
df = pd.read_csv(csv_path)

df['top3_flag'] = (df['finishing_position'] <= 3).astype(int)
X = df.drop(columns=['finishing_position', 'top3_flag'])
y = df['top3_flag']
cat_cols = ['circuit_id', 'driver_id', 'constructor_id']
cat_idx = [X.columns.get_loc(c) for c in cat_cols]

In [None]:
# Train CatBoost model
def train_model(X, y, cat_idx):
    params = MODEL_PARAMS.copy()
    params['class_weights'] = [1.0, (y == 0).sum() / max((y == 1).sum(), 1)]
    model = CatBoostClassifier(**params)
    model.fit(Pool(X, y, cat_features=cat_idx))
    return model

model = train_model(X, y, cat_idx)

In [None]:
# SHAP feature importance
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(Pool(X, cat_features=cat_idx))

shap.summary_plot(shap_values, X)
plt.show()

shap.plots.bar(shap_values)
plt.show()

In [None]:
# CatBoost built-in feature importance
importance_pred = model.get_feature_importance(Pool(X, y, cat_features=cat_idx), type='PredictionValuesChange')
importance_loss = model.get_feature_importance(Pool(X, y, cat_features=cat_idx), type='LossFunctionChange')

feat_importances = pd.DataFrame({
    'feature': X.columns,
    'PredictionValuesChange': importance_pred,
    'LossFunctionChange': importance_loss,
}).sort_values('PredictionValuesChange', ascending=False)
feat_importances.head(10)

In [None]:
# Permutation importance using scikit-learn
perm = permutation_importance(model, X, y, n_repeats=5, random_state=42, n_jobs=-1)
perm_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': perm.importances_mean,
}).sort_values('importance', ascending=False)
perm_importance.head(10)