In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score


In [2]:
df = pd.read_csv("loan_data.csv")
df.head()


Unnamed: 0,Age,Income,LoanAmount,CreditScore,EmploymentYears,LoanPurpose,MaritalStatus,Default
0,25,400000,200000,650,2,Personal,Single,0
1,45,1200000,500000,720,15,Home,Married,0
2,30,600000,300000,610,5,Education,Single,1
3,50,1500000,700000,750,20,Business,Married,0
4,28,500000,250000,580,3,Personal,Single,1


In [3]:
df.info()
df["Default"].value_counts()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Age              10 non-null     int64 
 1   Income           10 non-null     int64 
 2   LoanAmount       10 non-null     int64 
 3   CreditScore      10 non-null     int64 
 4   EmploymentYears  10 non-null     int64 
 5   LoanPurpose      10 non-null     object
 6   MaritalStatus    10 non-null     object
 7   Default          10 non-null     int64 
dtypes: int64(6), object(2)
memory usage: 772.0+ bytes


Default
0    6
1    4
Name: count, dtype: int64

In [4]:
X = df.drop("Default", axis=1)
y = df["Default"]

num_cols = X.select_dtypes(include=["int64","float64"]).columns
cat_cols = X.select_dtypes(include=["object"]).columns


In [6]:
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
])

model = Pipeline([
    ("prep", preprocessor),
    ("clf", RandomForestClassifier(
        n_estimators=300,
        max_depth=10,
        class_weight="balanced",
        random_state=42
    ))
])


In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

ROC-AUC: 1.0
