In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score


df = pd.read_csv('credit_train.csv')


In [2]:
df.shape

(100514, 19)

In [3]:
df.columns

Index(['Loan ID', 'Customer ID', 'Loan Status', 'Current Loan Amount', 'Term',
       'Credit Score', 'Annual Income', 'Years in current job',
       'Home Ownership', 'Purpose', 'Monthly Debt', 'Years of Credit History',
       'Months since last delinquent', 'Number of Open Accounts',
       'Number of Credit Problems', 'Current Credit Balance',
       'Maximum Open Credit', 'Bankruptcies', 'Tax Liens'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,Loan ID,Customer ID,Loan Status,Current Loan Amount,Term,Credit Score,Annual Income,Years in current job,Home Ownership,Purpose,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens
0,14dd8831-6af5-400b-83ec-68e61888a048,981165ec-3274-42f5-a3b4-d104041a9ca9,Fully Paid,445412.0,Short Term,709.0,1167493.0,8 years,Home Mortgage,Home Improvements,5214.74,17.2,,6.0,1.0,228190.0,416746.0,1.0,0.0
1,4771cc26-131a-45db-b5aa-537ea4ba5342,2de017a3-2e01-49cb-a581-08169e83be29,Fully Paid,262328.0,Short Term,,,10+ years,Home Mortgage,Debt Consolidation,33295.98,21.1,8.0,35.0,0.0,229976.0,850784.0,0.0,0.0
2,4eed4e6a-aa2f-4c91-8651-ce984ee8fb26,5efb2b2b-bf11-4dfd-a572-3761a2694725,Fully Paid,99999999.0,Short Term,741.0,2231892.0,8 years,Own Home,Debt Consolidation,29200.53,14.9,29.0,18.0,1.0,297996.0,750090.0,0.0,0.0
3,77598f7b-32e7-4e3b-a6e5-06ba0d98fe8a,e777faab-98ae-45af-9a86-7ce5b33b1011,Fully Paid,347666.0,Long Term,721.0,806949.0,3 years,Own Home,Debt Consolidation,8741.9,12.0,,9.0,0.0,256329.0,386958.0,0.0,0.0
4,d4062e70-befa-4995-8643-a0de73938182,81536ad9-5ccf-4eb8-befb-47a4d608658e,Fully Paid,176220.0,Short Term,,,5 years,Rent,Debt Consolidation,20639.7,6.1,,15.0,0.0,253460.0,427174.0,0.0,0.0


In [5]:
df.isna().sum()

Loan ID                           514
Customer ID                       514
Loan Status                       514
Current Loan Amount               514
Term                              514
Credit Score                    19668
Annual Income                   19668
Years in current job             4736
Home Ownership                    514
Purpose                           514
Monthly Debt                      514
Years of Credit History           514
Months since last delinquent    53655
Number of Open Accounts           514
Number of Credit Problems         514
Current Credit Balance            514
Maximum Open Credit               516
Bankruptcies                      718
Tax Liens                         524
dtype: int64

In [6]:
df.fillna({'Credit Score':df['Credit Score'].median() }, inplace=True)

In [7]:
df.shape[0]

100514

In [8]:
df.fillna({'Annual Income':df['Annual Income'].median() }, inplace=True)

In [9]:
df.shape[0]

100514

In [10]:
df.fillna({'Years in current job': "Unknown"}, inplace=True)

In [11]:
df.shape[0]

100514

In [12]:
df.fillna({'Months since last delinquent': 0}, inplace=True)

In [13]:
df.shape[0]

100514

In [14]:
df.dropna(subset=['Bankruptcies', 'Tax Liens', 'Maximum Open Credit'], inplace=True)

In [15]:
df.shape[0]

99794

In [16]:
df.dropna()
df.shape[0]

99794

In [17]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


df = df.drop(columns=['Loan ID','Customer ID'])


df = df.fillna(0)   


le = LabelEncoder()
df['Loan Status'] = le.fit_transform(df['Loan Status'])  # Fully Paid=1, Charged Off=0


X = df.drop(columns=['Loan Status'])
y = df['Loan Status']


X = pd.get_dummies(X, drop_first=True)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)


y_pred = rf.predict(X_test)

print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1-Score :", f1_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


new_applicant = pd.DataFrame([{
    'Current Loan Amount': 20000,
    'Term': 'Short Term',
    'Credit Score': 700,
    'Annual Income': 55000,
    'Years in current job': '5 years',
    'Home Ownership': 'Rent',
    'Purpose': 'Debt Consolidation',
    'Monthly Debt': 1500,
    'Years of Credit History': 10,
    'Months since last delinquent': 0,
    'Number of Open Accounts': 5,
    'Number of Credit Problems': 0,
    'Current Credit Balance': 10000,
    'Maximum Open Credit': 20000,
    'Bankruptcies': 0,
    'Tax Liens': 0
}])

new_applicant = pd.get_dummies(new_applicant)
new_applicant = new_applicant.reindex(columns=X.columns, fill_value=0)

approval_prob = rf.predict_proba(new_applicant)[0][1]
print("\nApproval Probability for New Applicant:", approval_prob)


Accuracy : 0.8182273661005061
Precision: 0.8124469889737065
Recall   : 0.9941619097042034
F1-Score : 0.8941656942823804

Confusion Matrix:
 [[ 1005  3538]
 [   90 15326]]

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.22      0.36      4543
           1       0.81      0.99      0.89     15416

    accuracy                           0.82     19959
   macro avg       0.87      0.61      0.63     19959
weighted avg       0.84      0.82      0.77     19959


Approval Probability for New Applicant: 0.57


In [20]:
import pickle

filename = 'credit_train.rol'

with open('credit_train.rol' , 'wb') as file:
    pickle.dump(rf,file)


with open('credit_train.rol', 'wb') as columns:
    pickle.dump(X.columns, columns)