# Hyperparameter Search Example - Bridge Failure Prediction

In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score


## Load Data

In [2]:
df = pd.read_csv('../data/processed/features.csv')
X = df.drop(['failure_within_1yr', 'structure_id'], axis=1)
y = df['failure_within_1yr']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Grid Search

In [3]:
params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5],
    'max_features': ['auto', 'sqrt']
}
gs = GridSearchCV(RandomForestClassifier(random_state=42), params, cv=3, scoring='roc_auc', n_jobs=-1)
gs.fit(X_train, y_train)
print('Best Params:', gs.best_params_)
print('Best Score:', gs.best_score_)


## Evaluate Best Model

In [4]:
best_model = gs.best_estimator_
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:,1]
from sklearn.metrics import classification_report
print('ROC AUC:', roc_auc_score(y_test, y_proba))
print(classification_report(y_test, y_pred))