In [1]:
#step 1 - import the lib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import seaborn as sns

In [3]:
#step 2 - read the data
data = pd.read_csv(r'D:\My Project\ML Model\loan_approval.csv')
data.head()

Unnamed: 0,name,city,income,credit_score,loan_amount,years_employed,points,loan_approved
0,Allison Hill,East Jill,113810,389,39698,27,50,False
1,Brandon Hall,New Jamesside,44592,729,15446,28,55,False
2,Rhonda Smith,Lake Roberto,33278,584,11189,13,45,False
3,Gabrielle Davis,West Melanieview,127196,344,48823,29,50,False
4,Valerie Gray,Mariastad,66048,496,47174,4,25,False


In [4]:
#step 3 - null values
data.isna().sum()

name              0
city              0
income            0
credit_score      0
loan_amount       0
years_employed    0
points            0
loan_approved     0
dtype: int64

In [5]:
#step 4 - duplicate values
data.duplicated().sum()

0

In [6]:
#step 5 - unique values in target
data['loan_approved'].unique()

array([False,  True])

In [7]:
#step 6 - convert target into 0 and 1
dict1 = {False:0, True:1}
data['loan_approved'] = data['loan_approved'].map(dict1)
data.head()

Unnamed: 0,name,city,income,credit_score,loan_amount,years_employed,points,loan_approved
0,Allison Hill,East Jill,113810,389,39698,27,50,0
1,Brandon Hall,New Jamesside,44592,729,15446,28,55,0
2,Rhonda Smith,Lake Roberto,33278,584,11189,13,45,0
3,Gabrielle Davis,West Melanieview,127196,344,48823,29,50,0
4,Valerie Gray,Mariastad,66048,496,47174,4,25,0


In [8]:
#step 7 - drop unwanted columns
data = data.drop(['name','city'], axis=1)
data.head()

Unnamed: 0,income,credit_score,loan_amount,years_employed,points,loan_approved
0,113810,389,39698,27,50,0
1,44592,729,15446,28,55,0
2,33278,584,11189,13,45,0
3,127196,344,48823,29,50,0
4,66048,496,47174,4,25,0


In [9]:
#step 8 - independent & dependent
inde = data.iloc[:,0:5]
depe = data.iloc[:,5]

inde.head(), depe.head()

(   income  credit_score  loan_amount  years_employed  points
 0  113810           389        39698              27      50
 1   44592           729        15446              28      55
 2   33278           584        11189              13      45
 3  127196           344        48823              29      50
 4   66048           496        47174               4      25,
 0    0
 1    0
 2    0
 3    0
 4    0
 Name: loan_approved, dtype: int64)

In [10]:
#step 9 - split the data
x_train, x_test, y_train, y_test = train_test_split(inde, depe, test_size=0.2, random_state=0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((1600, 5), (400, 5), (1600,), (400,))

In [11]:
#step 10 - create the model
model= DecisionTreeClassifier(criterion='entropy',random_state=1225)
model.fit(x_train, y_train)

In [12]:
#step 11 - prediction
pred = model.predict(x_test)
pred

array([0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,

In [13]:
#step 12 - confusion matrix
confusion = metrics.confusion_matrix(y_test, pred)
confusion

array([[220,   0],
       [  0, 180]], dtype=int64)

In [14]:
#step 13 - accuracy score
acc = metrics.accuracy_score(y_test, pred)
acc


1.0

In [15]:
import pickle
pickle.dump(model,open("Loan_Model.pkl","wb"))

In [16]:
#step 16 - get values from user and test the model

income = int(input("Enter your income: "))
credit_score = int(input("Enter your credit score: "))
loan_amount = int(input("Enter your loan amount: "))
years_employed = int(input("Enter your years employed: "))
points = int(input("Enter your points: "))

user_data = [[income, credit_score, loan_amount, years_employed, points]]

output = model.predict(user_data)

if output[0] == 1:
    print("your loan will approve")
else:
    print("your loan will not approve")

your loan will not approve


