## Step A: DATA PREPROCESSING

#### Step 1: Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Step 2: Import the dataset

In [2]:
dataset = pd.read_csv("data/Logistic Data.csv")

In [3]:
dataset.head()

Unnamed: 0,Age,Salary,Purchased Plot
0,22,22990,0
1,38,24200,0
2,29,52030,0
3,30,68970,0
4,22,91960,0


#### Step 3: Do we have any missing data?

In [4]:
dataset.isnull().sum()

Age               0
Salary            0
Purchased Plot    0
dtype: int64

### Step B: Building classification model

#### Step 1: To create a feature matrix (X) and output dependent column vector (Y)

In [5]:
X = dataset.iloc[:, 0:2].values
Y = dataset.iloc[:, -1].values

In [6]:
X

array([[    22,  22990],
       [    38,  24200],
       [    29,  52030],
       [    30,  68970],
       [    22,  91960],
       [    30,  70180],
       [    30, 101640],
       [    35, 181500],
       [    28,  39930],
       [    38,  78650],
       [    29,  96800],
       [    29,  62920],
       [    23, 104060],
       [    35,  21780],
       [    21,  99220],
       [    32,  96800],
       [    50,  30250],
       [    48,  31460],
       [    49,  33880],
       [    51,  35090],
       [    48,  26620],
       [    50,  59290],
       [    51,  49610],
       [    48,  26620],
       [    49,  27830],
       [    50,  24200],
       [    52,  33880],
       [    50,  36300],
       [    32,  52030],
       [    34,  21780],
       [    34,  89540],
       [    30, 165770],
       [    24,  19360],
       [    31,  53240],
       [    30, 108900],
       [    38,  32670],
       [    36,  33880],
       [    33,  59290],
       [    29,  87120],
       [    30,  37510],


#### Step 2: Split the data into training set and testing set

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

#### Step 3: Building a classifier model

#### i) K Nearest Neighbours (KNN) Algorithm

In [8]:
from sklearn.neighbors import KNeighborsClassifier

KNN = KNeighborsClassifier(8)

#### Training the model

In [9]:
KNN.fit(x_train, y_train)

0,1,2
,n_neighbors,8
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


#### Testing the model

In [10]:
y_pred_KNN = KNN.predict(x_test)

### Use some metric to evaluate the KNN classifier

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [12]:
confusion_matrix(y_test, y_pred_KNN)

array([[45,  4],
       [18, 13]])

In [13]:
print("KNN Classifier")
print("**************")
print(f"1. Confusion Matrix:\n{confusion_matrix(y_test, y_pred_KNN)}")
print(f"2. Accuracy Matrix: {accuracy_score(y_test, y_pred_KNN)}")
print(f"3. Precision Score: {precision_score(y_test, y_pred_KNN)}")
print(f"4. Recall Score: {recall_score(y_test, y_pred_KNN)}")
print(f"5. F1 Score: {f1_score(y_test, y_pred_KNN)}")

KNN Classifier
**************
1. Confusion Matrix:
[[45  4]
 [18 13]]
2. Accuracy Matrix: 0.725
3. Precision Score: 0.7647058823529411
4. Recall Score: 0.41935483870967744
5. F1 Score: 0.5416666666666666


#### ii) Naive Bayes Classifier

In [14]:
from sklearn.naive_bayes import GaussianNB

NB = GaussianNB()

### Training the NB model

In [15]:
NB.fit(x_train, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


### Testing the NB model

In [16]:
y_pred_NB = NB.predict(x_test)

In [17]:
print("NB Classifier")
print("**************")
print(f"1. Confusion Matrix:\n{confusion_matrix(y_test, y_pred_NB)}")
print(f"2. Accuracy Matrix: {accuracy_score(y_test, y_pred_NB)}")
print(f"3. Precision Score: {precision_score(y_test, y_pred_NB)}")
print(f"4. Recall Score: {recall_score(y_test, y_pred_NB)}")
print(f"5. F1 Score: {f1_score(y_test, y_pred_NB)}")

NB Classifier
**************
1. Confusion Matrix:
[[46  3]
 [13 18]]
2. Accuracy Matrix: 0.8
3. Precision Score: 0.8571428571428571
4. Recall Score: 0.5806451612903226
5. F1 Score: 0.6923076923076923


#### iii) Decision Tree Classifier

In [18]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier(criterion="entropy", max_depth=4)

### Training the DT model

In [19]:
DT.fit(x_train, y_train)

0,1,2
,criterion,'entropy'
,splitter,'best'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


### Testing the DT model

In [20]:
y_pred_DT = DT.predict(x_test)

In [21]:
print("DT Classifier")
print("**************")
print(f"1. Confusion Matrix:\n{confusion_matrix(y_test, y_pred_DT)}")
print(f"2. Accuracy Matrix: {accuracy_score(y_test, y_pred_DT)}")
print(f"3. Precision Score: {precision_score(y_test, y_pred_DT)}")
print(f"4. Recall Score: {recall_score(y_test, y_pred_DT)}")
print(f"5. F1 Score: {f1_score(y_test, y_pred_DT)}")

DT Classifier
**************
1. Confusion Matrix:
[[45  4]
 [11 20]]
2. Accuracy Matrix: 0.8125
3. Precision Score: 0.8333333333333334
4. Recall Score: 0.6451612903225806
5. F1 Score: 0.7272727272727273


#### iv) Random Forest Classifier

In [22]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(1000, criterion="entropy")
RF.fit(x_train, y_train)
y_pred_RF = RF.predict(x_test)

In [23]:
print("RF Classifier")
print("**************")
print(f"1. Confusion Matrix:\n{confusion_matrix(y_test, y_pred_RF)}")
print(f"2. Accuracy Matrix: {accuracy_score(y_test, y_pred_RF)}")
print(f"3. Precision Score: {precision_score(y_test, y_pred_RF)}")
print(f"4. Recall Score: {recall_score(y_test, y_pred_RF)}")
print(f"5. F1 Score: {f1_score(y_test, y_pred_RF)}")

RF Classifier
**************
1. Confusion Matrix:
[[45  4]
 [ 7 24]]
2. Accuracy Matrix: 0.8625
3. Precision Score: 0.8571428571428571
4. Recall Score: 0.7741935483870968
5. F1 Score: 0.8135593220338984
