<a href="https://colab.research.google.com/github/SarinaSufiyan131/LoanApplicationRiskPrediction/blob/main/NoteBook/loanpred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **loan application Risk Prediction**
Predict loan application success or rejection using credit insights for smarter financial decision-making.

In [1]:
# Importing all necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix, RocCurveDisplay, classification_report, ConfusionMatrixDisplay, roc_curve
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
!pip install pytorch-tabular

from pytorch_tabular import TabularModel
import torch
import torch.nn as nn
import pytorch_lightning as pl
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from pytorch_lightning.loggers import CSVLogger

import warnings
warnings.filterwarnings('ignore')



In [9]:
# Loading the datasets to ensure consistency for the section
application_samples = pd.read_csv('loan_applications.csv')
credit_features = pd.read_csv('credit_features_subset.csv')
data_dictionary = pd.read_csv('loan_data_dictionary.csv')

In [5]:
credit_features.head()


Unnamed: 0,UID,ALL_AgeOfOldestAccount,ALL_AgeOfYoungestAccount,ALL_Count,ALL_CountActive,ALL_CountClosedLast12Months,ALL_CountDefaultAccounts,ALL_CountOpenedLast12Months,ALL_CountSettled,ALL_MeanAccountAge,ALL_SumCurrentOutstandingBal,ALL_SumCurrentOutstandingBalExcMtg,ALL_TimeSinceMostRecentDefault,ALL_WorstPaymentStatusActiveAccounts
0,216009,227,12,14,11,0,3,0,3,71.36,9292,9292,-1,7
1,217537,49,10,7,6,1,3,1,1,29.33,4592,4592,-1,7
2,221803,134,0,12,11,0,3,5,1,32.33,2737,2737,-1,7
3,222260,227,2,39,12,0,2,2,27,41.56,9229,9229,-1,7
4,222848,132,2,99,11,8,0,9,88,43.34,15443,15443,-1,0


In [11]:
application_samples.head()

Unnamed: 0,UID,ApplicationDate,Amount,Term,EmploymentType,LoanPurpose,Success
0,4921736,03/07/2020,2000,60,Employed - full time,Unexpected bills,0
1,1241981,04/02/2020,3000,60,Employed - full time,Starting new bussniess,0
2,5751748,02/08/2020,20000,60,Employed - full time,Business capital,0
3,7163425,23/09/2020,20000,60,Self employed,New business venture,0
4,227377,01/01/2020,5000,36,Employed - full time,car,0


In [12]:
data_dictionary.head()

Unnamed: 0,Name,Description
0,UID,unique identifier
1,Application Date,Date of loan application
2,Amount,Amount requested by applicant
3,Term,Repayment period requested by applicant
4,Employment Type,Stated employment


In [13]:
application_samples.isnull().sum()


Unnamed: 0,0
UID,0
ApplicationDate,0
Amount,0
Term,0
EmploymentType,0
LoanPurpose,0
Success,0


In [14]:
credit_features.isnull().sum()


Unnamed: 0,0
UID,0
ALL_AgeOfOldestAccount,0
ALL_AgeOfYoungestAccount,0
ALL_Count,0
ALL_CountActive,0
ALL_CountClosedLast12Months,0
ALL_CountDefaultAccounts,0
ALL_CountOpenedLast12Months,0
ALL_CountSettled,0
ALL_MeanAccountAge,0


In [15]:
# 1. Distribution of Loan Amounts
fig_amount_dist = go.Figure()

fig_amount_dist.add_trace(go.Histogram(
    x=application_samples['Amount'],
    nbinsx=30,
    marker=dict(line=dict(color='black', width=1)),
    opacity=0.75
))

fig_amount_dist.update_layout(
    title={
        'text': "Distribution of Loan Amounts",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title="Loan Amount",
    yaxis_title="Frequency",
    template="plotly_white",
    font=dict(size=14),
    plot_bgcolor="rgba(240,240,240,1)",
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

fig_amount_dist.show()

In [16]:
# 2. Distribution of Loan Terms
fig_term_dist = go.Figure()

fig_term_dist.add_trace(go.Histogram(
    x=application_samples['Term'],
    nbinsx=10,
    marker=dict(color='orange', line=dict(color='black', width=1)),
    opacity=0.75
))

fig_term_dist.update_layout(
    title={
        'text': "Distribution of Loan Terms",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title="Loan Term (Months)",
    yaxis_title="Frequency",
    template="plotly_white",
    font=dict(size=14),
    plot_bgcolor="rgba(240,240,240,1)",
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

fig_term_dist.show()

In [22]:
fig_SS_dist = go.Figure()
fig_SS_dist.update_layout(
    title="sarina",
    xaxis_title="Loan Term (Months)",
    yaxis_title="Frequency",
    template="plotly_white",font=dict(size=14),
    plot_bgcolor="rgba(240,240,240,1)",
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False)
fig_SS_dist.show()


In [23]:
# 3. Proportion of Success
fig_success_dist = go.Figure()

fig_success_dist.add_trace(go.Pie(
    labels=['Rejected (0)', 'Approved (1)'],
    values=application_samples['Success'].value_counts().values,
    marker=dict(colors=['#FFC0C0', '#ADD8E6']),  # Light colors
    hole=0.4
))

fig_success_dist.update_layout(
    title={
        'text': "Proportion of Loan Success",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font=dict(size=14),
    template="plotly_white",
    showlegend=True
)

fig_success_dist.show()

In [24]:
# 4. Breakdown of Employment Type
employment_counts = application_samples['EmploymentType'].value_counts().reset_index()
employment_counts.columns = ['Employment Type', 'Count']

fig_employment = go.Figure()

fig_employment.add_trace(go.Bar(
    x=employment_counts['Count'],
    y=employment_counts['Employment Type'],
    orientation='h',
    marker=dict(color='#ADD8E6', line=dict(color='black', width=1))
))

fig_employment.update_layout(
    title={
        'text': "Employment Type Distribution",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis_title="Count",
    yaxis_title="Employment Type",
    template="plotly_white",
    font=dict(size=14),
    plot_bgcolor="rgba(240,240,240,1)",
    margin=dict(l=50, r=50, t=50, b=50),
    showlegend=False
)

fig_employment.show()