# Modeling - Bridge Failure Prediction

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import joblib

## Load Data

In [2]:
df = pd.read_csv('../data/processed/features.csv')
X = df.drop(['failure_within_1yr', 'structure_id'], axis=1)
y = df['failure_within_1yr']

## Train/Test Split

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

## Fit RandomForest Model

In [4]:
rf = RandomForestClassifier(n_estimators=100, max_depth=7, random_state=42)
rf.fit(X_train, y_train)

## Evaluate on Test Set

In [5]:
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)[:, 1]
print(classification_report(y_test, y_pred))
print('ROC AUC:', roc_auc_score(y_test, y_proba))

## Confusion Matrix

In [6]:
import seaborn as sns
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## Feature Importance

In [7]:
importances = rf.feature_importances_
feat_names = X.columns
feat_imp = pd.Series(importances, index=feat_names).sort_values(ascending=False)
feat_imp.plot(kind='bar', figsize=(12,4))
plt.title('Feature Importance')
plt.tight_layout()
plt.show()

## Save Model

In [8]:
joblib.dump(rf, '../models/trained/model.joblib')