#### PART A: DATA Preprocessing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### step 2: Import the data

In [2]:
dataset=pd.read_csv('Logistic Data.csv')

In [3]:
dataset.head()

Unnamed: 0,Age,Salary,Purchased Plot
0,22,22990,0
1,38,24200,0
2,29,52030,0
3,30,68970,0
4,22,91960,0


#### Step 3: Checking for missing data

In [4]:
dataset.isnull().sum()

Age               0
Salary            0
Purchased Plot    0
dtype: int64

#### Step:4 Create feature vector(x) and dependent variable column vector(y)

In [5]:
X=dataset.iloc[:, :- 1].values
Y=dataset.iloc[:,-1].values #means takes all rows and only the last column

In [6]:
X

array([[    22,  22990],
       [    38,  24200],
       [    29,  52030],
       [    30,  68970],
       [    22,  91960],
       [    30,  70180],
       [    30, 101640],
       [    35, 181500],
       [    28,  39930],
       [    38,  78650],
       [    29,  96800],
       [    29,  62920],
       [    23, 104060],
       [    35,  21780],
       [    21,  99220],
       [    32,  96800],
       [    50,  30250],
       [    48,  31460],
       [    49,  33880],
       [    51,  35090],
       [    48,  26620],
       [    50,  59290],
       [    51,  49610],
       [    48,  26620],
       [    49,  27830],
       [    50,  24200],
       [    52,  33880],
       [    50,  36300],
       [    32,  52030],
       [    34,  21780],
       [    34,  89540],
       [    30, 165770],
       [    24,  19360],
       [    31,  53240],
       [    30, 108900],
       [    38,  32670],
       [    36,  33880],
       [    33,  59290],
       [    29,  87120],
       [    30,  37510],


### PART B: Building the classification Model

#### Step 1: Splitting the dataset into training set and testing set

In [7]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2)

#### Step 2: Classification Model

#### i) K Nearest Neighbors

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
KNN=KNeighborsClassifier(n_neighbors=7)

In [10]:
## Training the KNN classifier
KNN.fit(xtrain,ytrain)

In [11]:
## Testing the KNN classifier
Ypred_KNN=KNN.predict(xtest)

In [12]:
## Some metric to test the classifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [13]:
print(" KNN Metrics for the model")
print("***************************")
print("Confusion matrix:{}".format(confusion_matrix(ytest,Ypred_KNN)))
print("Accuracy:{}".format(accuracy_score(ytest,Ypred_KNN)))
print(" Precision Score: {}".format(precision_score(ytest, Ypred_KNN)))
print(" Recall Score: {}".format(recall_score(ytest, Ypred_KNN)))
print(" F1 Score :{}".format(f1_score(ytest, Ypred_KNN)))

 KNN Metrics for the model
***************************
Confusion matrix:[[48  4]
 [ 8 20]]
Accuracy:0.85
 Precision Score: 0.8333333333333334
 Recall Score: 0.7142857142857143
 F1 Score :0.7692307692307693


#### ii) Naive Bayes' ALgorithm

In [14]:
from sklearn.naive_bayes import GaussianNB

In [15]:
NB=GaussianNB() #gaussianNB is a class

In [29]:
## Training the NB classifier
NB.fit(xtrain,ytrain)

In [27]:
### Testing the NB classifier
Ypred_NB=NB.predict(xtest)

In [28]:
print(" Naive Bayes' Metrics for the model")
print("***************************")
print("Confusion matrix:{}".format(confusion_matrix(ytest,Ypred_NB)))
print("Accuracy:{}".format(accuracy_score(ytest,Ypred_NB)))
print(" Precision Score: {}".format(precision_score(ytest, Ypred_NB)))
print(" Recall Score: {}".format(recall_score(ytest, Ypred_NB)))
print(" F1 Score :{}".format(f1_score(ytest, Ypred_NB)))

 Naive Bayes' Metrics for the model
***************************
Confusion matrix:[[51  1]
 [ 7 21]]
Accuracy:0.9
 Precision Score: 0.9545454545454546
 Recall Score: 0.75
 F1 Score :0.84


#### iii) Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier(criterion='entropy',max_depth=4)

In [19]:
## Training the DT classifier
dt.fit(xtrain,ytrain)

In [30]:
### Testing the DT classifier
Ypred_DT=dt.predict(xtest)

In [31]:
print(" Decision Tree Metrics for the model")
print("***************************")
print("Confusion matrix:{}".format(confusion_matrix(ytest,Ypred_DT)))
print("Accuracy:{}".format(accuracy_score(ytest,Ypred_DT)))
print(" Precision Score: {}".format(precision_score(ytest, Ypred_DT)))
print(" Recall Score: {}".format(recall_score(ytest, Ypred_DT)))
print(" F1 Score :{}".format(f1_score(ytest, Ypred_DT)))

 Decision Tree Metrics for the model
***************************
Confusion matrix:[[49  3]
 [ 5 23]]
Accuracy:0.9
 Precision Score: 0.8846153846153846
 Recall Score: 0.8214285714285714
 F1 Score :0.8518518518518519


### Few more matrices which are common with Regression model
#### Mean-Absolute Error(MAE)
#### Mean-Square Error(MSE)

#### iv) Random Forest Tree Classifier

In [32]:
from sklearn.ensemble import RandomForestClassifier

In [33]:
rf=RandomForestClassifier(n_estimators=1000, criterion='entropy')

In [37]:
## Training the RFT classifier
rf.fit(xtrain,ytrain)

In [38]:
### Testing the RFT classifier
Ypred_RFT=rf.predict(xtest)

In [39]:
print(" Random Forest Tree Metrics for the model")
print("***************************")
print("Confusion matrix:{}".format(confusion_matrix(ytest,Ypred_RFT)))
print("Accuracy:{}".format(accuracy_score(ytest,Ypred_RFT)))
print(" Precision Score: {}".format(precision_score(ytest, Ypred_RFT)))
print(" Recall Score: {}".format(recall_score(ytest, Ypred_RFT)))
print(" F1 Score :{}".format(f1_score(ytest, Ypred_RFT)))

 Random Forest Tree Metrics for the model
***************************
Confusion matrix:[[48  4]
 [ 5 23]]
Accuracy:0.8875
 Precision Score: 0.8518518518518519
 Recall Score: 0.8214285714285714
 F1 Score :0.8363636363636363
