In [24]:
import pandas as pd

raw_data = pd.read_csv(r"C:\Users\sheha\Downloads\archive\loan_data.csv")

df = pd.DataFrame(raw_data)

df.head()

Unnamed: 0,Text,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,I need a loan to pay for an international vaca...,26556,581,8314,79.26,employed,Rejected
1,I want to make home improvements like installi...,197392,389,111604,22.14,employed,Rejected
2,"I need a loan for home renovation, including a...",44561,523,34118,45.44,employed,Rejected
3,I need funds to buy new furniture and applianc...,190363,729,118757,10.22,unemployed,Rejected
4,I need a loan to start a small business.,61853,732,19210,44.13,employed,Approved


In [26]:
#  preprocessing
df = df.drop(columns = ["Text"])
df.head(2)

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,26556,581,8314,79.26,employed,Rejected
1,197392,389,111604,22.14,employed,Rejected


In [28]:
print(df['Employment_Status'].unique())
print(df['Approval'].unique())

['employed' 'unemployed']
['Rejected' 'Approved']


In [30]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

df['Employment_Status'] = label_encoder.fit_transform(df['Employment_Status'])
df['Approval'] = label_encoder.fit_transform(df['Approval'])

## Encoding Values
- `Employment_Status`: {employed: 0 , unemployed: 1}
- `Approval`: {Approved 0 : Rejected: 1}

In [32]:
df.head(2)

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,26556,581,8314,79.26,0,1
1,197392,389,111604,22.14,0,1


In [35]:
## Modeling
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 23)

clsf_model = LogisticRegression(max_iter = 10000, random_state = 0)
clsf_model.fit(X_train, y_train)

y_pred = clsf_model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8297916666666667


## Deployment

In [103]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [37]:
import joblib

#save the trained model
joblib.dump(clsf_model, 'model.pkl')

['model.pkl']

In [51]:
X_test.sample()

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status
14790,91232,602,44327,11.4,1


In [65]:
## loading the model
import numpy as np
loaded_model = joblib.load('model.pkl')

# predictions = loaded_model.predict(X_test)

new_data = np.array([[20000, 400,50000, 7, 1]])

predictions = loaded_model.predict(new_data)

predictions[0]   #rejected



1

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status
401,196599,340,50325,4.97,0
4432,190559,745,25840,17.24,0
876,23949,471,4920,96.43,1
4422,179345,346,57710,3.75,1
18368,161262,833,89277,10.98,0
...,...,...,...,...,...
16391,74827,471,23439,20.05,1
4537,67443,479,11159,61.45,1
22414,143948,681,72037,33.62,1
18661,185037,707,51804,6.44,1
