<a href="https://colab.research.google.com/github/Rajat262005/Rajat262005-Project-Idea-Loan-Default-Prediction/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Loan Default Prediction

In [None]:
#Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# Load Dataset
df = pd.read_csv("/content/Default_Fin.csv")
df.head()

Unnamed: 0,Index,Employed,Bank Balance,Annual Salary,Defaulted?
0,1,1,8754.36,532339.56,0
1,2,0,9806.16,145273.56,0
2,3,1,12882.6,381205.68,0
3,4,1,6351.0,428453.88,0
4,5,1,9427.92,461562.0,0


In [None]:
# EDA (Exploratory Data Analysis)
print("Shape:", df.shape)
print(df.info())
print(df.isnull().sum())

Shape: (10000, 5)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Index          10000 non-null  int64  
 1   Employed       10000 non-null  int64  
 2   Bank Balance   10000 non-null  float64
 3   Annual Salary  10000 non-null  float64
 4   Defaulted?     10000 non-null  int64  
dtypes: float64(2), int64(3)
memory usage: 390.8 KB
None
Index            0
Employed         0
Bank Balance     0
Annual Salary    0
Defaulted?       0
dtype: int64


In [None]:
# Preprocessing
df.drop("Index", axis=1, inplace=True)

In [None]:
# Features and target
X = df.drop("Defaulted?", axis=1)
y = df["Defaulted?"]

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

# Train Models
models = {
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

In [None]:
# Display Results
for name, metrics in results.items():
    print(f"\n📊 Model: {name}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print("Classification Report:\n", metrics['report'])

    cm = confusion_matrix(y_test, metrics['model'].predict(X_test))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {name}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()


In [None]:
# Feature Importance
for name, metrics in results.items():
    model = metrics['model']
    if hasattr(model, 'feature_importances_'):
        importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
        plt.figure(figsize=(6, 4))
        sns.barplot(x=importances, y=importances.index)
        plt.title(f"Feature Importance - {name}")
        plt.show()




In [None]:
# GridSearchCV on Random Forest
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

print("\n Best Parameters (Random Forest):", grid.best_params_)
y_pred_best = grid.predict(X_test)
print("Tuned Accuracy:", accuracy_score(y_test, y_pred_best))
print(" Final Classification Report:\n", classification_report(y_test, y_pred_best))



 Best Parameters (Random Forest): {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 50}
Tuned Accuracy: 0.9685
 Final Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.99      0.98      1931
           1       0.59      0.28      0.38        69

    accuracy                           0.97      2000
   macro avg       0.78      0.63      0.68      2000
weighted avg       0.96      0.97      0.96      2000

