### PART A: Preprocessing

#### Step-1: Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Step-2: Import the data

In [2]:
dataset=pd.read_csv("Logistic Data.csv")

In [3]:
dataset.head()

Unnamed: 0,Age,Salary,Purchased Plot
0,22,22990,0
1,38,24200,0
2,29,52030,0
3,30,68970,0
4,22,91960,0


#### Step-3: Checking for missing data

In [4]:
dataset.isnull().sum()

Age               0
Salary            0
Purchased Plot    0
dtype: int64

#### STep-4: Create Feature vector (X) and Dependent Variable column vector (Y)

In [5]:
X=dataset.iloc[:,:-1].values
Y=dataset.iloc[:,-1].values

In [6]:
X

array([[    22,  22990],
       [    38,  24200],
       [    29,  52030],
       [    30,  68970],
       [    22,  91960],
       [    30,  70180],
       [    30, 101640],
       [    35, 181500],
       [    28,  39930],
       [    38,  78650],
       [    29,  96800],
       [    29,  62920],
       [    23, 104060],
       [    35,  21780],
       [    21,  99220],
       [    32,  96800],
       [    50,  30250],
       [    48,  31460],
       [    49,  33880],
       [    51,  35090],
       [    48,  26620],
       [    50,  59290],
       [    51,  49610],
       [    48,  26620],
       [    49,  27830],
       [    50,  24200],
       [    52,  33880],
       [    50,  36300],
       [    32,  52030],
       [    34,  21780],
       [    34,  89540],
       [    30, 165770],
       [    24,  19360],
       [    31,  53240],
       [    30, 108900],
       [    38,  32670],
       [    36,  33880],
       [    33,  59290],
       [    29,  87120],
       [    30,  37510],


### PART-B: Building The classification model

#### Step-1: Splitting the dataset into training set and testing set

In [7]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2)

#### Step-2: Classification Model

#### i)K Nearest Neighbours(KNN)

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
KNN=KNeighborsClassifier(n_neighbors=7)

In [10]:
#### Training the KNN classifier
KNN.fit(xtrain,ytrain)

In [11]:
## testing the KNN Classifier
Ypred_KNN=KNN.predict(xtest)

In [12]:
## Some metric to test the classifier
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [14]:
print("KNN Metrics for the model")
print("*************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_KNN)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_KNN)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_KNN)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_KNN)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_KNN)))

KNN Metrics for the model
*************************
Confusion Matrix:[[48  0]
 [ 7 25]]
Accuracy Score:0.9125
Precision Score:1.0
Recall Score:0.78125
F1 Score:0.8771929824561403


#### ii)Naive Bayes Algorithm

In [16]:
from sklearn.naive_bayes import GaussianNB

In [21]:
NB=GaussianNB()

In [22]:
### Training the NB classifier
NB.fit(xtrain,ytrain)
### testing the NB classifier
Ypred_NB=NB.predict(xtest)

In [24]:
print("Naive Bayes Metrics for the model")
print("*********************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_NB)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_NB)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_NB)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_NB)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_NB)))

Naive Bayes Metrics for the model
*********************************
Confusion Matrix:[[45  3]
 [ 4 28]]
Accuracy Score:0.9125
Precision Score:0.9032258064516129
Recall Score:0.875
F1 Score:0.8888888888888888


#### iii) Decision Tree

In [32]:
from sklearn.tree import DecisionTreeClassifier
DT=DecisionTreeClassifier(criterion='entropy',max_depth=4)

In [40]:
### Training the DT classifier
DT.fit(xtrain,ytrain)
### testing the DT classifier
Ypred_DT=DT.predict(xtest)

In [34]:
print("Decision Tree Metrics for the model")
print("***********************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_DT)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_DT)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_DT)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_DT)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_DT)))

Decision Tree Metrics for the model
***********************************
Confusion Matrix:[[45  3]
 [ 4 28]]
Accuracy Score:0.9125
Precision Score:0.9032258064516129
Recall Score:0.875
F1 Score:0.8888888888888888


#### iv) random Forest Tree classifier

In [35]:
from sklearn.ensemble import RandomForestClassifier

In [36]:
rf=RandomForestClassifier(n_estimators=1000,criterion='entropy')

In [37]:
### Training the RFT classifier
rf.fit(xtrain,ytrain)
### testing the RFT classifier
Ypred_rf=rf.predict(xtest)

In [39]:
print("Random Forest Tree Metrics for the model")
print("****************************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_rf)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_rf)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_rf)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_rf)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_rf)))

Random Forest Tree Metrics for the model
****************************************
Confusion Matrix:[[44  4]
 [ 3 29]]
Accuracy Score:0.9125
Precision Score:0.8787878787878788
Recall Score:0.90625
F1 Score:0.8923076923076924
