In [24]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression


In [25]:
loan_dataset=pd.read_csv('loan.csv')

In [26]:
loan_dataset.head()

Unnamed: 0,age,gender,occupation,education_level,marital_status,income,credit_score,loan_status
0,32,Male,Engineer,Bachelor's,Married,85000,720,Approved
1,45,Female,Teacher,Master's,Single,62000,680,Approved
2,28,Male,Student,High School,Single,25000,590,Denied
3,51,Female,Manager,Bachelor's,Married,105000,780,Approved
4,36,Male,Accountant,Bachelor's,Married,75000,710,Approved


In [27]:
loan_dataset.describe()

Unnamed: 0,age,income,credit_score
count,61.0,61.0,61.0
mean,37.081967,78983.606557,709.836066
std,8.424755,33772.025802,72.674888
min,24.0,25000.0,560.0
25%,30.0,52000.0,650.0
50%,36.0,78000.0,720.0
75%,43.0,98000.0,770.0
max,55.0,180000.0,830.0


In [28]:
loan_dataset['gender'] = loan_dataset['gender'].map({'Male': 0, 'Female': 1})
loan_dataset['loan_status'] = loan_dataset['loan_status'].map({'Approved': 0, 'Denied': 1})

In [29]:
#seprating the data and labels
features=loan_dataset.drop(columns=['occupation','education_level','marital_status','credit_score'])
target=loan_dataset['credit_score']

In [30]:
print(features)
print(target)

    age  gender  income  loan_status
0    32       0   85000            0
1    45       1   62000            0
2    28       0   25000            1
3    51       1  105000            0
4    36       0   75000            0
..  ...     ...     ...          ...
56   39       0  100000            0
57   25       1   32000            1
58   43       0   95000            0
59   30       1   55000            0
60   38       0   65000            0

[61 rows x 4 columns]
0     720
1     680
2     590
3     780
4     710
     ... 
56    770
57    570
58    760
59    650
60    700
Name: credit_score, Length: 61, dtype: int64


In [31]:
scaler=StandardScaler()
scaler.fit(features)

In [32]:
standardized_data=scaler.transform(features)
print(standardized_data)

[[-0.60822442 -0.98373875  0.17962567 -0.59628479]
 [ 0.94765289  1.01653005 -0.50706321 -0.59628479]
 [-1.0869559  -0.98373875 -1.61173663  1.67705098]
 [ 1.66575011  1.01653005  0.77674644 -0.59628479]
 [-0.12949294 -0.98373875 -0.11893471 -0.59628479]
 [-1.56568738  1.01653005 -0.92504775  1.67705098]
 [ 0.58860428 -0.98373875  1.22458702 -0.59628479]
 [-0.96727303  1.01653005 -1.22360813  1.67705098]
 [-0.00981007 -0.98373875  0.38861794 -0.59628479]
 [ 1.3067015   1.01653005  3.01594932 -0.59628479]
 [ 2.14448158 -0.98373875  0.92602663 -0.59628479]
 [-0.72790729  1.01653005 -0.32792698 -0.59628479]
 [-1.32632164 -0.98373875 -1.10418398  1.67705098]
 [ 0.22955567  1.01653005 -0.0293666  -0.59628479]
 [ 0.82797002 -0.98373875  0.47818606 -0.59628479]
 [-1.20663877  1.01653005 -0.80562359  1.67705098]
 [-0.36885868 -0.98373875  0.26919379 -0.59628479]
 [ 0.46892141  1.01653005  1.37386721 -0.59628479]
 [-0.84759016 -0.98373875 -0.20850283 -0.59628479]
 [ 1.90511584  1.01653005  0.56

In [33]:
X_train,X_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=2)

In [34]:
print(features.shape,X_train.shape,X_test.shape)

(61, 4) (48, 4) (13, 4)


In [39]:
import warnings
warnings.filterwarnings('ignore')
classifier=LogisticRegression()
classifier.fit(X_train,y_train)

In [40]:
y_prediction=classifier.predict(X_train)
training_data_accuracy=accuracy_score(y_train,y_prediction)
print('Accuracy score of the training data: ',training_data_accuracy)

Accuracy score of the training data:  0.125


In [41]:
y_prediction=classifier.predict(X_test)
testing_data_accuracy=accuracy_score(y_test,y_prediction)
print('Accuracy score of the test data: ',testing_data_accuracy)

Accuracy score of the test data:  0.0


In [42]:
input_data=(36,0,67000,1)

# changing the input_data to numpy array
input_data_as_numpy_array=np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)

#standardize the input data
std_data=scaler.transform(input_data_reshaped)
print(std_data)

prediction=classifier.predict(std_data)
print(prediction)

if (prediction[0]==0):
    print('The loan is denied')
else:
    print('The loan is approved')

[[-0.12949294 -0.98373875 -0.35778302  1.67705098]]
[820]
The loan is approved
