In [1]:
import numpy 
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("C:/Users/techin/credit_analysis_week5/data/processed/cleaned_data_with_risk.csv")

In [3]:
X = df.drop(columns=['is_high_risk', 'CustomerId'])
y = df['is_high_risk']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

models = {
    "LogisticRegression": LogisticRegression(random_state=42, max_iter=1000),
    "RandomForest": RandomForestClassifier(random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(f"{name} trained.")

LogisticRegression trained.
RandomForest trained.


In [5]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42), 
    param_grid, 
    cv=3, 
    scoring='roc_auc',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
print("Best params:", grid_search.best_params_)
best_rf = grid_search.best_estimator_

Best params: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]

    return {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred),
        'ROC AUC': roc_auc_score(y_test, y_proba)
    }

for name, model in models.items():
    scores = evaluate_model(model, X_test, y_test)
    print(f"--- {name} ---")
    for metric, score in scores.items():
        print(f"{metric}: {score:.4f}")

--- LogisticRegression ---
Accuracy: 0.9920
Precision: 0.9965
Recall: 0.9825
F1 Score: 0.9894
ROC AUC: 0.9999
--- RandomForest ---
Accuracy: 0.9987
Precision: 0.9965
Recall: 1.0000
F1 Score: 0.9983
ROC AUC: 1.0000
