In [6]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import warnings 

warnings.filterwarnings('ignore')

In [8]:
raw_data = pd.read_csv("loan_data.csv")
df = pd.DataFrame(raw_data)
df.head()

Unnamed: 0,Text,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,I need a loan to pay for an international vaca...,26556,581,8314,79.26,employed,Rejected
1,I want to make home improvements like installi...,197392,389,111604,22.14,employed,Rejected
2,"I need a loan for home renovation, including a...",44561,523,34118,45.44,employed,Rejected
3,I need funds to buy new furniture and applianc...,190363,729,118757,10.22,unemployed,Rejected
4,I need a loan to start a small business.,61853,732,19210,44.13,employed,Approved


In [10]:
#  preprocessing
df = df.drop(columns = ["Text"])
df.head(2)

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,26556,581,8314,79.26,employed,Rejected
1,197392,389,111604,22.14,employed,Rejected


In [12]:
# checking categorical data unique values
print(df['Employment_Status'].unique())
print(df['Approval'].unique())

['employed' 'unemployed']
['Rejected' 'Approved']


In [14]:
# categorical encoding
label_encoder = LabelEncoder()
df['Employment_Status'] = label_encoder.fit_transform(df['Employment_Status'])
df['Approval'] = label_encoder.fit_transform(df['Approval'])

- `Employment_Status`: {employed: 0 , unemployed: 1}
- `Approval`: {Approved 0 : Rejected: 1}

In [16]:
df.head(2)

Unnamed: 0,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,26556,581,8314,79.26,0,1
1,197392,389,111604,22.14,0,1


In [18]:
## Modeling
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 23)

clsf_model = LogisticRegression(max_iter = 10000, random_state = 0)
clsf_model.fit(X_train, y_train)

y_pred = clsf_model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8297916666666667


## Deployment

In [103]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [20]:
#save the trained model
joblib.dump(clsf_model, 'model.pkl')

['model.pkl']

In [24]:
## loading the model

loaded_model = joblib.load('model.pkl')

# predictions = loaded_model.predict(X_test)

new_data = np.array([[20000, 400,50000, 7, 1]])

predictions = loaded_model.predict(new_data)

predictions[0] 

1