# Loan Eligibility Prediction

### import require packages


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Load the Dataset

In [3]:
df = pd.read_csv("Loan_default.csv")

### Perform Exploratory Data Analysis

In [4]:
df.info

<bound method DataFrame.info of             LoanID  Age  Income  LoanAmount  CreditScore  MonthsEmployed  \
0       I38PQUQS96   56   85994       50587          520              80   
1       HPSK72WA7R   69   50432      124440          458              15   
2       C1OZ6DPJ8Y   46   84208      129188          451              26   
3       V2KKSFM3UN   32   31713       44799          743               0   
4       EY08JDHTZP   60   20437        9139          633               8   
...            ...  ...     ...         ...          ...             ...   
255342  8C6S86ESGC   19   37979      210682          541             109   
255343  98R4KDHNND   32   51953      189899          511              14   
255344  XQK1UUUNGP   56   84820      208294          597              70   
255345  JAO28CPL4H   42   85109       60575          809              40   
255346  ZTH91CGL0B   62   22418       18481          636             113   

        NumCreditLines  InterestRate  LoanTerm  DTIRati

In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,255347.0,43.498306,14.990258,18.0,31.0,43.0,56.0,69.0
Income,255347.0,82499.304597,38963.013729,15000.0,48825.5,82466.0,116219.0,149999.0
LoanAmount,255347.0,127578.865512,70840.706142,5000.0,66156.0,127556.0,188985.0,249999.0
CreditScore,255347.0,574.264346,158.903867,300.0,437.0,574.0,712.0,849.0
MonthsEmployed,255347.0,59.541976,34.643376,0.0,30.0,60.0,90.0,119.0
NumCreditLines,255347.0,2.501036,1.117018,1.0,2.0,2.0,3.0,4.0
InterestRate,255347.0,13.492773,6.636443,2.0,7.77,13.46,19.25,25.0
LoanTerm,255347.0,36.025894,16.96933,12.0,24.0,36.0,48.0,60.0
DTIRatio,255347.0,0.500212,0.230917,0.1,0.3,0.5,0.7,0.9
Default,255347.0,0.116128,0.320379,0.0,0.0,0.0,0.0,1.0


In [14]:
df.isnull().sum()   # same as df.isna().sum()

LoanID            0
Age               0
Income            0
LoanAmount        0
CreditScore       0
MonthsEmployed    0
NumCreditLines    0
InterestRate      0
LoanTerm          0
DTIRatio          0
Education         0
EmploymentType    0
MaritalStatus     0
HasMortgage       0
HasDependents     0
LoanPurpose       0
HasCoSigner       0
Default           0
dtype: int64

## Data preprocessing / Data cleansing

In [15]:
df.drop('LoanID',axis= 1, inplace= True)
df.head()

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
0,56,85994,50587,520,80,4,15.23,36,0.44,Bachelor's,Full-time,Divorced,Yes,Yes,Other,Yes,0
1,69,50432,124440,458,15,1,4.81,60,0.68,Master's,Full-time,Married,No,No,Other,Yes,0
2,46,84208,129188,451,26,3,21.17,24,0.31,Master's,Unemployed,Divorced,Yes,Yes,Auto,No,1
3,32,31713,44799,743,0,3,7.07,24,0.23,High School,Full-time,Married,No,No,Business,No,0
4,60,20437,9139,633,8,4,6.51,48,0.73,Bachelor's,Unemployed,Divorced,No,Yes,Auto,No,0


In [16]:
# convort the categorical into numerical 
from sklearn.preprocessing import LabelEncoder

df['Education'] = LabelEncoder().fit_transform(df['Education'])
df['EmploymentType'] = LabelEncoder().fit_transform(df['EmploymentType'])
df['MaritalStatus'] = LabelEncoder().fit_transform(df['MaritalStatus'])
df['HasMortgage'] = LabelEncoder().fit_transform(df['HasMortgage'])
df['HasDependents'] = LabelEncoder().fit_transform(df['HasDependents'])
df['LoanPurpose'] = LabelEncoder().fit_transform(df['LoanPurpose'])
df['HasCoSigner'] = LabelEncoder().fit_transform(df['HasCoSigner'])

In [17]:
df.shape

(255347, 17)

In [18]:
df['Default'].value_counts()

Default
0    225694
1     29653
Name: count, dtype: int64

### Function for Split the data into training and tesint data set

In [19]:
def  Split_Dataset(x, y):
    from sklearn.model_selection import train_test_split

    X_train, x_test, Y_train, y_test = train_test_split(x, y, train_size= 0.7, random_state=12350, stratify=y)
    return X_train, x_test, Y_train, y_test

# models function 

In [20]:

class Models:
    def __init__(self ,X_train, Y_train):
        self.X_train = X_train
        self.Y_train = Y_train
        

    # Built the Logistic Regression model
    def model_LG(self):
        from sklearn.linear_model import LogisticRegressionCV
        model_lg = LogisticRegressionCV().fit(self.X_train, self.Y_train)
        return model_lg

    # Built the Naive Bayes 
    def model_NB(self):
        from sklearn.naive_bayes import GaussianNB
        model_nb = GaussianNB().fit(self.X_train, self.Y_train)
        return model_nb

    # Built the KNN model
    def model_KNN(self):
        from sklearn.neighbors import KNeighborsClassifier
        model_knn = KNeighborsClassifier(n_cluster ).fit(self.X_train, self.Y_train)
        return model_knn

    # Built the Support Vector Machine
    def model_SVM(self):
        from sklearn.svm import SVC
        model_svm = SVC(C=3.0).fit(self.X_train, self.Y_train)
        return model_svm
        
    # Built the Decison tree model 
    def model_DT(self):
        from sklearn.tree import DecisionTreeClassifier
        model_dt = DecisionTreeClassifier().fit(self.X_train, self.Y_train)
        return model_dt

    # Built the model for Random forest
    def model_Rand_forest(self):
        from sklearn.ensemble import RandomForestClassifier
        model_rand_forest = RandomForestClassifier().fit(self.X_train, self.Y_train)
        return model_rand_forest    

    


### classification report and Model Evaluation function

In [21]:
class Model_Evaluation_class:
    def __init__(self ,x_test, y_test):
        self.x_test = x_test
        self.y_test = y_test

    def Classification_Report(self, model):
        from sklearn.metrics import classification_report
        return classification_report(self.y_test, model.predict(self.x_test))

    def model_evaluate(self, model, model_name):
        from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score,f1_score
        
        Accuracy = accuracy_score(self.y_test, model.predict(self.x_test))
        Precision = precision_score(self.y_test, model.predict(self.x_test))
        Recall = recall_score(self.y_test, model.predict(self.x_test))
        f1_Score = f1_score(self.y_test, model.predict(self.x_test))
        return model_name,Accuracy,Precision,Recall,f1_Score

# Deal with imbalanced Data

## Balancing data by using under sampling majority class

In [22]:
# separate the class 1, 0 count 
class_0_count, class_1_count = df['Default'].value_counts()

# separate  the class 1, 0 into 2 df
df_class_0 = df[df['Default']==0]
df_class_1 = df[df['Default']==1]



In [23]:
print(df_class_0['Default'].value_counts())
print(df_class_1['Default'].value_counts())

Default
0    225694
Name: count, dtype: int64
Default
1    29653
Name: count, dtype: int64


In [24]:
60000-29653

30347

In [25]:
df_class_0_under = df_class_0.sample(30347)

In [26]:
balance_df_by_under = pd.concat([df_class_1, df_class_0_under],axis= 0)
balance_df_by_under.value_counts()

Age  Income  LoanAmount  CreditScore  MonthsEmployed  NumCreditLines  InterestRate  LoanTerm  DTIRatio  Education  EmploymentType  MaritalStatus  HasMortgage  HasDependents  LoanPurpose  HasCoSigner  Default
69   142700  25379       494          116             3               4.33          24        0.66      1          2               0              1            1              3            0            0          1
     143074  82339       449          20              3               17.39         36        0.58      1          2               1              0            1              4            0            0          1
     143203  71647       604          66              2               21.03         60        0.31      2          1               2              1            1              2            1            0          1
     144083  134113      754          112             2               20.09         36        0.60      1          2               1              0      

In [27]:
balance_df_by_under.corr()

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
Age,1.0,0.014928,-0.015774,0.00302,0.023195,-0.004666,-0.035309,-0.004447,-0.01326,0.009022,-0.009909,-0.005484,0.006388,0.009656,0.008975,0.014264,-0.258183
Income,0.014928,1.0,-0.068455,-0.002103,0.006412,-0.000502,-0.016258,-0.001029,0.003434,0.000129,-0.007879,0.000245,0.002447,-0.000705,-0.003401,-0.00509,-0.147201
LoanAmount,-0.015774,-0.068455,1.0,-0.000566,-0.003298,0.002452,0.009458,-0.004346,0.001662,-0.002527,0.005968,-0.00195,0.002602,0.000276,0.005081,-0.004314,0.135719
CreditScore,0.00302,-0.002103,-0.000566,1.0,0.005013,-0.001133,-0.006062,0.002481,-0.002253,0.009505,-0.00148,-0.005664,-0.00121,-0.001522,0.00528,0.007566,-0.052136
MonthsEmployed,0.023195,0.006412,-0.003298,0.005013,1.0,-0.003083,-0.016108,-0.007016,-0.007056,0.004211,-0.004892,0.010048,0.003685,0.004087,-0.000449,0.009582,-0.154903
NumCreditLines,-0.004666,-0.000502,0.002452,-0.001133,-0.003083,1.0,0.008437,0.000674,0.000969,0.004417,0.000534,0.00029,-0.003977,0.007051,0.002578,0.000901,0.048895
InterestRate,-0.035309,-0.016258,0.009458,-0.006062,-0.016108,0.008437,1.0,0.006244,0.008942,-0.002196,0.006196,-0.0119,-0.006681,-0.003395,-0.004464,-0.003829,0.207837
LoanTerm,-0.004447,-0.001029,-0.004346,0.002481,-0.007016,0.000674,0.006244,1.0,-0.000385,-0.005829,0.000951,-0.007176,0.006286,0.00431,0.002102,0.001955,0.003013
DTIRatio,-0.01326,0.003434,0.001662,-0.002253,-0.007056,0.000969,0.008942,-0.000385,1.0,0.000821,0.002607,0.00809,-0.001818,4.3e-05,-0.001399,-0.001203,0.041261
Education,0.009022,0.000129,-0.002527,0.009505,0.004211,0.004417,-0.002196,-0.005829,0.000821,1.0,0.001623,-0.003352,0.002736,-0.004443,-0.00847,0.001267,-0.04008


### Split the data set into x and y

In [28]:
x = balance_df_by_under[['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed',
       'NumCreditLines', 'InterestRate', 'DTIRatio', 'Education',
       'EmploymentType', 'HasMortgage', 'HasDependents',
       'LoanPurpose', 'HasCoSigner']]
print(x.head())
y = balance_df_by_under['Default']
y.head()
balance_df_by_under.value_counts()

    Age  Income  LoanAmount  CreditScore  MonthsEmployed  NumCreditLines  \
2    46   84208      129188          451              26               3   
5    25   90298       90448          720              18               2   
8    36   42053       92357          827              83               1   
11   28  149227      139759          375              56               3   
18   19   40718       78515          319             119               2   

    InterestRate  DTIRatio  Education  EmploymentType  HasMortgage  \
2          21.17      0.31          2               3            1   
5          22.72      0.10          1               3            1   
8          23.94      0.20          0               2            1   
11          5.84      0.80          3               0            0   
18         14.00      0.17          0               2            1   

    HasDependents  LoanPurpose  HasCoSigner  
2               1            0            0  
5               0            1

Age  Income  LoanAmount  CreditScore  MonthsEmployed  NumCreditLines  InterestRate  LoanTerm  DTIRatio  Education  EmploymentType  MaritalStatus  HasMortgage  HasDependents  LoanPurpose  HasCoSigner  Default
69   142700  25379       494          116             3               4.33          24        0.66      1          2               0              1            1              3            0            0          1
     143074  82339       449          20              3               17.39         36        0.58      1          2               1              0            1              4            0            0          1
     143203  71647       604          66              2               21.03         60        0.31      2          1               2              1            1              2            1            0          1
     144083  134113      754          112             2               20.09         36        0.60      1          2               1              0      

### Split the data into training and testing dataset

In [29]:
X_train, x_test, Y_train, y_test = Split_Dataset(x, y)

### Build multiple model on above dataset

In [30]:
m1 = Models(X_train,Y_train)
models = [(m1.model_LG(),"LogisticRegression",(0,0)),(m1.model_KNN(), "KNeighborsClassifier",(0,1)),
          (m1.model_NB(),"Naive Bayes",(1,0)),(m1.model_SVM(),"Support Vector Machine",(1,1)),
          (m1.model_DT(),"DecisionTreeClassifier",(2,0)),(m1.model_Rand_forest(),"RandomForest",(2,1))]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

### Evaluate all models 

In [38]:
model_evaluation_obj = Model_Evaluation_class(x_test, y_test)
Results = []
for model,model_name,_ in models:
    Results.append(model_evaluation_obj.model_evaluate(model, model_name))

df1 = pd.DataFrame(Results,columns=['Algorithm','Accuracy', 'Precision', 'Recall', 'F1_Score'])
df1

Unnamed: 0,Algorithm,Accuracy,Precision,Recall,F1_Score
0,LogisticRegression,0.6785,0.678699,0.663669,0.6711
1,KNeighborsClassifier,0.540389,0.536074,0.520346,0.528093
2,Naive Bayes,0.677,0.670314,0.681767,0.675992
3,Support Vector Machine,0.586667,0.63155,0.392873,0.484407
4,DecisionTreeClassifier,0.584611,0.580965,0.57228,0.57659
5,RandomForest,0.6785,0.681919,0.655013,0.668196


### Classification report for all models 

In [41]:
for model,model_name,_ in models:
    print(f"Classification report for {model_name} is :")
    print(model_evaluation_obj.Classification_Report(model))
    print("-"*80)

Classification report for LogisticRegression is :
              precision    recall  f1-score   support

           0       0.68      0.69      0.69      9104
           1       0.68      0.66      0.67      8896

    accuracy                           0.68     18000
   macro avg       0.68      0.68      0.68     18000
weighted avg       0.68      0.68      0.68     18000

--------------------------------------------------------------------------------
Classification report for KNeighborsClassifier is :
              precision    recall  f1-score   support

           0       0.54      0.56      0.55      9104
           1       0.54      0.52      0.53      8896

    accuracy                           0.54     18000
   macro avg       0.54      0.54      0.54     18000
weighted avg       0.54      0.54      0.54     18000

--------------------------------------------------------------------------------
Classification report for Naive Bayes is :
              precision    recall  f1-s

## Oversampling 

In [30]:
30347 * 2

60694

In [31]:
df_class_1_over = df_class_1.sample(60000,replace= True)
print(df_class_1_over['Default'].value_counts())
df_class_1_over.head()

Default
1    60000
Name: count, dtype: int64


Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
156937,23,123379,35979,837,53,2,24.89,36,0.74,2,0,0,0,0,0,1,1
24441,68,94026,216283,566,40,4,14.64,60,0.48,1,2,1,1,0,1,1,1
232897,19,93287,247098,699,4,3,7.03,12,0.29,3,1,1,1,1,3,0,1
39927,30,123912,229758,329,91,3,13.58,36,0.56,3,3,0,0,1,1,0,1
87186,18,100565,41222,452,49,3,19.25,24,0.56,1,3,2,1,1,4,1,1


In [32]:
df_class_0_reduce = df_class_0.sample(60000)
print(df_class_0['Default'].value_counts())

Default
0    225694
Name: count, dtype: int64


In [33]:
balance_df_by_over = pd.concat([df_class_1_over, df_class_0_reduce],axis=0)
print(balance_df_by_over['Default'].value_counts())

Default
1    60000
0    60000
Name: count, dtype: int64


In [34]:
balance_df_by_over.corr()

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
Age,1.0,0.01529,-0.017831,0.007635,0.018684,-0.007842,-0.038763,-0.001962,-0.012206,0.004096,-0.018142,-0.002588,0.0015,0.009628,0.01158,0.006207,-0.264507
Income,0.01529,1.0,-0.069971,-0.002637,0.006728,-0.000745,-0.013017,0.001943,0.001845,0.001063,-0.00673,-0.000519,-0.001525,0.004596,-0.001832,-0.002685,-0.15001
LoanAmount,-0.017831,-0.069971,1.0,-0.005082,-0.005336,0.00283,0.00561,-0.003092,-9.3e-05,-0.003511,0.007376,-0.000506,-0.005879,4.8e-05,0.00182,-0.001259,0.134094
CreditScore,0.007635,-0.002637,-0.005082,1.0,0.006182,-0.002822,-0.007322,-0.000621,0.003373,-0.001469,0.000344,-0.004386,-0.002977,-0.002863,0.00322,0.002902,-0.048827
MonthsEmployed,0.018684,0.006728,-0.005336,0.006182,1.0,-7.8e-05,-0.015259,-0.002788,-0.002739,0.000625,-0.003941,0.010295,0.00233,0.004598,0.003398,0.006414,-0.150508
NumCreditLines,-0.007842,-0.000745,0.00283,-0.002822,-7.8e-05,1.0,0.005294,-0.002463,0.004086,0.000876,-3.8e-05,0.001077,-0.005711,0.005463,0.006353,0.000957,0.041968
InterestRate,-0.038763,-0.013017,0.00561,-0.007322,-0.015259,0.005294,1.0,0.0085,0.007167,-0.005294,0.008617,-0.015735,-0.004076,-0.007098,0.002788,-0.00714,0.206832
LoanTerm,-0.001962,0.001943,-0.003092,-0.000621,-0.002788,-0.002463,0.0085,1.0,0.003457,-0.0001,0.000591,-0.0019,0.00306,0.010131,0.004215,-0.002358,0.00154
DTIRatio,-0.012206,0.001845,-9.3e-05,0.003373,-0.002739,0.004086,0.007167,0.003457,1.0,3.4e-05,0.005441,0.004588,-0.005801,-0.005294,-0.001688,-0.001901,0.032055
Education,0.004096,0.001063,-0.003511,-0.001469,0.000625,0.000876,-0.005294,-0.0001,3.4e-05,1.0,0.003999,-0.007356,0.00248,0.001175,-0.007955,0.007201,-0.035991


### Split the data set into x and y

In [35]:
x = balance_df_by_over[['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed',
       'NumCreditLines', 'InterestRate', 'DTIRatio', 'Education',
       'EmploymentType', 'HasMortgage', 'HasDependents',
       'LoanPurpose', 'HasCoSigner']]
print(x.head())
y = balance_df_by_over['Default']
y.head()
balance_df_by_over.shape

        Age  Income  LoanAmount  CreditScore  MonthsEmployed  NumCreditLines  \
156937   23  123379       35979          837              53               2   
24441    68   94026      216283          566              40               4   
232897   19   93287      247098          699               4               3   
39927    30  123912      229758          329              91               3   
87186    18  100565       41222          452              49               3   

        InterestRate  DTIRatio  Education  EmploymentType  HasMortgage  \
156937         24.89      0.74          2               0            0   
24441          14.64      0.48          1               2            1   
232897          7.03      0.29          3               1            1   
39927          13.58      0.56          3               3            0   
87186          19.25      0.56          1               3            1   

        HasDependents  LoanPurpose  HasCoSigner  
156937              0   

(120000, 17)

### Split the data into training and testing dataset

In [36]:
X_train, x_test, Y_train, y_test = Split_Dataset(x, y)

### Build multiple model on above dataset

In [37]:
m2 = Models(X_train,Y_train)
models = [(m2.model_LG(),"LogisticRegression",(0,0)),(m2.model_KNN(), "KNeighborsClassifier",(0,1)),
          (m2.model_NB(),"Naive Bayes",(1,0)),(m2.model_SVM(),"Support Vector Machine",(1,1)),
          (m2.model_DT(),"DecisionTreeClassifier",(2,0)),(m2.model_Rand_forest(),"RandomForest",(2,1))]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

### Evaluate all models 

In [39]:
model_evaluation_obj = Model_Evaluation_class(x_test, y_test)
Results2 = []
for model,model_name,_ in models:
    Results2.append(model_evaluation_obj.model_evaluate(model, model_name))

df2 = pd.DataFrame(Results2,columns=['Algorithm','Accuracy', 'Precision', 'Recall', 'F1_Score'])
df2

Unnamed: 0,Algorithm,Accuracy,Precision,Recall,F1_Score
0,LogisticRegression,0.671611,0.672032,0.670389,0.671209
1,KNeighborsClassifier,0.626306,0.614783,0.6765,0.644166
2,Naive Bayes,0.669083,0.662537,0.689222,0.675616
3,Support Vector Machine,0.584889,0.6298,0.411889,0.498052
4,DecisionTreeClassifier,0.783,0.742803,0.865778,0.79959
5,RandomForest,0.850667,0.832701,0.877667,0.854593


### Classification report for all models 

In [40]:
for model,model_name,_ in models:
    print(f"Classification report for {model_name} is :")
    print(model_evaluation_obj.Classification_Report(model))
    print("-"*80)

Classification report for LogisticRegression is :
              precision    recall  f1-score   support

           0       0.67      0.67      0.67     18000
           1       0.67      0.67      0.67     18000

    accuracy                           0.67     36000
   macro avg       0.67      0.67      0.67     36000
weighted avg       0.67      0.67      0.67     36000

--------------------------------------------------------------------------------
Classification report for KNeighborsClassifier is :
              precision    recall  f1-score   support

           0       0.64      0.58      0.61     18000
           1       0.61      0.68      0.64     18000

    accuracy                           0.63     36000
   macro avg       0.63      0.63      0.63     36000
weighted avg       0.63      0.63      0.63     36000

--------------------------------------------------------------------------------
Classification report for Naive Bayes is :
              precision    recall  f1-s