# Ablation Study Example - Bridge Failure Prediction


In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import numpy as np

## Load Data


In [2]:
df = pd.read_csv('../data/processed/features.csv')
y = df['failure_within_1yr']
features = list(df.drop(['failure_within_1yr', 'structure_id'], axis=1).columns)


## Baseline Model


In [3]:
X = df[features]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
baseline = RandomForestClassifier(n_estimators=100, random_state=42)
baseline.fit(X_train, y_train)
auc_baseline = roc_auc_score(y_test, baseline.predict_proba(X_test)[:,1])
print('Baseline ROC AUC:', auc_baseline)


## Remove Each Feature in Turn


In [4]:
results = []
for feat in features:
    subset = [f for f in features if f != feat]
    X_train_sub = X_train[subset]
    X_test_sub = X_test[subset]
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_sub, y_train)
    auc = roc_auc_score(y_test, model.predict_proba(X_test_sub)[:,1])
    results.append({'feature_removed': feat, 'roc_auc': auc})
ablation_df = pd.DataFrame(results)
ablation_df['drop_auc'] = auc_baseline - ablation_df['roc_auc']
ablation_df = ablation_df.sort_values('drop_auc', ascending=False)
print(ablation_df.head())


## Plot AUC Drop by Feature


In [5]:
import matplotlib.pyplot as plt
plt.barh(ablation_df['feature_removed'], ablation_df['drop_auc'])
plt.xlabel('AUC Drop')
plt.title('Ablation Study: Feature Contribution')
plt.tight_layout()
plt.show()
