In [None]:
#Model 4
# Step 1: Install XGBoost if needed

!pip install xgboost --quiet

Step 2: Import libraries and load data

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score, classification_report, accuracy_score

Load dataset
df = pd.read_csv("credit_risk_dataset.csv")

Encode categoricals
categorical = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
for col in categorical:
df[col] = LabelEncoder().fit_transform(df[col].astype(str))

df = df.dropna(subset=["loan_status"])
X = df.drop(columns=["loan_status"])
y = df["loan_status"]

Scale numeric columns
numeric_cols = X.select_dtypes(include=["int64", "float64"]).columns
X[numeric_cols] = StandardScaler().fit_transform(X[numeric_cols])

Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, stratify=y, random_state=42)

Step 3: Train basic XGBoost model

model = xgb.XGBClassifier(
max_depth=5,
learning_rate=0.1,
n_estimators=100,
subsample=0.8,
colsample_bytree=0.8,
objective='binary:logistic',
use_label_encoder=False,
random_state=42
)

model.fit(X_train, y_train)

Step 4: Evaluate model

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_proba))
print("Classification Report:\n", classification_report(y_test, y_pred))