In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
file_path = "loan_default_prediction.csv"  # Replace with your local file path if needed
data = pd.read_csv(file_path)

# Quick view of the dataset
print(data.head())

# Separate features and target variable
X = data.drop(columns=["Loan_ID", "Loan_Default"])
y = data["Loan_Default"]

# Identify categorical and numerical features
categorical_features = ["Employment_Status", "Marital_Status", "Education"]
numerical_features = ["Age", "Income", "Loan_Amount", "Loan_Term", "Credit_Score", "Debt_to_Income_Ratio"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Preprocessing for categorical and numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(), categorical_features),
    ]
)

# Create a pipeline with a RandomForestClassifier
pipeline = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", RandomForestClassifier(random_state=42)),
    ]
)

# Train the model
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

  Loan_ID  Age  Income  Loan_Amount  Loan_Term  Credit_Score  \
0  L00001   59    6018         8814         36           408   
1  L00002   49   13302        40984         60           520   
2  L00003   35    3970        31616         36           667   
3  L00004   63   10389         7113         36           446   
4  L00005   28    7765        48006         24           411   

  Employment_Status  Debt_to_Income_Ratio Marital_Status Education  \
0          Employed                  0.39       Divorced    Master   
1        Unemployed                  0.18       Divorced    Master   
2          Employed                  0.34       Divorced       PhD   
3          Employed                  0.38       Divorced    Master   
4          Employed                  0.39       Divorced    Master   

   Loan_Default  
0             1  
1             0  
2             1  
3             0  
4             1  
Confusion Matrix:
 [[74 33]
 [50 43]]

Classification Report:
               precision