# Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics 

In [4]:
df = pd.read_csv(r'C:\Users\sanuv\Downloads\loan_approval_dataset.csv')
df

Unnamed: 0,MonthlyIncome,Age,CreditScore,LoanAmountRequested,LoanTermMonths,EmploymentStatus,MaritalStatus,NumDependents,LoanApproved
0,4732,58,472,11572,50,Self-Employed,Divorced,1,0
1,4607,40,540,13194,24,Self-Employed,Single,4,0
2,3653,20,667,16230,27,Self-Employed,Divorced,0,1
3,5264,56,498,6093,40,Employed,Divorced,1,1
4,6931,22,562,14775,37,Self-Employed,Single,0,0
...,...,...,...,...,...,...,...,...,...
995,6038,54,599,12125,19,Employed,Divorced,0,1
996,4963,54,407,6711,29,Employed,Single,0,1
997,3186,24,770,14820,44,Unemployed,Single,2,1
998,4098,56,432,15111,41,Self-Employed,Divorced,4,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   MonthlyIncome        1000 non-null   int64 
 1   Age                  1000 non-null   int64 
 2   CreditScore          1000 non-null   int64 
 3   LoanAmountRequested  1000 non-null   int64 
 4   LoanTermMonths       1000 non-null   int64 
 5   EmploymentStatus     1000 non-null   object
 6   MaritalStatus        1000 non-null   object
 7   NumDependents        1000 non-null   int64 
 8   LoanApproved         1000 non-null   int64 
dtypes: int64(7), object(2)
memory usage: 70.4+ KB


In [6]:
# Checking the missing value
df.isnull().sum()

MonthlyIncome          0
Age                    0
CreditScore            0
LoanAmountRequested    0
LoanTermMonths         0
EmploymentStatus       0
MaritalStatus          0
NumDependents          0
LoanApproved           0
dtype: int64

# Data Preprocessing

In [7]:
# Replacing Catogorical value to Numerical value

df.MaritalStatus=df.MaritalStatus.map({'Married':0,'Divorced':1,'Single':2})
df['MaritalStatus'].value_counts()

0    345
2    343
1    312
Name: MaritalStatus, dtype: int64

In [8]:
df.EmploymentStatus=df.EmploymentStatus.map({'Employed':0,'Unemployed':1,'Self-Employed':2})
df['EmploymentStatus'].value_counts()

1    362
0    333
2    305
Name: EmploymentStatus, dtype: int64

In [9]:
df['LoanApproved'].value_counts()

1    515
0    485
Name: LoanApproved, dtype: int64

In [10]:
# Final Dataframe
df.head()

Unnamed: 0,MonthlyIncome,Age,CreditScore,LoanAmountRequested,LoanTermMonths,EmploymentStatus,MaritalStatus,NumDependents,LoanApproved
0,4732,58,472,11572,50,2,1,1,0
1,4607,40,540,13194,24,2,2,4,0
2,3653,20,667,16230,27,2,1,0,1
3,5264,56,498,6093,40,0,1,1,1
4,6931,22,562,14775,37,2,2,0,0


# Model Building

In [11]:
X = df.drop('LoanApproved', axis = 1)
y = df['LoanApproved']
print('shape of X =', X.shape)
print('shape of y =', y.shape)

shape of X = (1000, 8)
shape of y = (1000,)


In [12]:
# Spliting the data into train and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

print('Shape of X_train', X_train.shape)
print('Shape of X_test', X_test.shape)
print('Shape of y_train', y_train.shape)
print('Shape of y_test', y_test.shape)

Shape of X_train (800, 8)
Shape of X_test (200, 8)
Shape of y_train (800,)
Shape of y_test (200,)


In [13]:
dt = DecisionTreeClassifier()

In [14]:
dt.fit(X_train, y_train)

In [15]:
y_pred_test = dt.predict(X_test)
y_pred_test

array([0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 1], dtype=int64)

In [17]:
dt.score(X_test, y_test)

0.455

In [18]:
accuracy_test = metrics.accuracy_score(y_test,y_pred_test) * 100
accuracy_test

45.5

In [22]:
print("Model Accuracy score for test dataset", accuracy_test)

Model Accuracy score for test dataset 45.5
