In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load the dataset
df = pd.read_csv("bankloans.csv")

# Drop rows with missing target values
df_cleaned = df.dropna(subset=['default'])

# Define features and target variable
X = df_cleaned.drop(columns=['default'])
y = df_cleaned['default']

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_preds = rf_model.predict(X_test_scaled)
rf_probs = rf_model.predict_proba(X_test_scaled)[:, 1]

# Train XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train_scaled, y_train)
xgb_preds = xgb_model.predict(X_test_scaled)
xgb_probs = xgb_model.predict_proba(X_test_scaled)[:, 1]

# Evaluate models
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Model Performance:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("Classification Report:\n", classification_report(y_true, y_pred))

# Display results
evaluate_model("Random Forest", y_test, rf_preds, rf_probs)
evaluate_model("XGBoost", y_test, xgb_preds, xgb_probs)


Parameters: { "use_label_encoder" } are not used.




Random Forest Model Performance:
Accuracy: 0.8285714285714286
AUC-ROC: 0.8474153765415902
Classification Report:
               precision    recall  f1-score   support

         0.0       0.86      0.92      0.89       103
         1.0       0.72      0.57      0.64        37

    accuracy                           0.83       140
   macro avg       0.79      0.74      0.76       140
weighted avg       0.82      0.83      0.82       140


XGBoost Model Performance:
Accuracy: 0.7785714285714286
AUC-ROC: 0.8105484124901601
Classification Report:
               precision    recall  f1-score   support

         0.0       0.86      0.83      0.85       103
         1.0       0.57      0.62      0.60        37

    accuracy                           0.78       140
   macro avg       0.72      0.73      0.72       140
weighted avg       0.78      0.78      0.78       140

