# Hyperparameter Tuning - Bridge Failure Prediction

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import joblib

## Load Data

In [2]:
df = pd.read_csv('../data/processed/features.csv')
X = df.drop(['failure_within_1yr', 'structure_id'], axis=1)
y = df['failure_within_1yr']
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## Set Up Grid Search

In [3]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 8, None],
    'min_samples_split': [2, 5],
    'max_features': ['auto', 'sqrt']
}
rf = RandomForestClassifier(random_state=42)
grid = GridSearchCV(rf, param_grid, cv=3, scoring='roc_auc', n_jobs=-1)
grid.fit(X_train, y_train)

## Best Parameters and Validation ROC AUC

In [4]:
print('Best params:', grid.best_params_)
best_model = grid.best_estimator_
val_pred = best_model.predict_proba(X_valid)[:, 1]
print('Validation ROC AUC:', roc_auc_score(y_valid, val_pred))

## Save Best Model

In [5]:
joblib.dump(best_model, '../models/trained/model_tuned.joblib')