# Step-A:DATA PREPROCESSING

#### Step-1: import the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#### Step-2: Import the data

In [3]:
dataset=pd.read_csv('vehicle.data')

In [4]:
dataset.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


#### Step-3: Split the data into X and Y

In [7]:
X=dataset.iloc[:,:-1].values
Y=dataset.iloc[:,-1].values

In [8]:
X

array([['vhigh', 'vhigh', '2', '2', 'small', 'med'],
       ['vhigh', 'vhigh', '2', '2', 'small', 'high'],
       ['vhigh', 'vhigh', '2', '2', 'med', 'low'],
       ...,
       ['low', 'low', '5more', 'more', 'big', 'low'],
       ['low', 'low', '5more', 'more', 'big', 'med'],
       ['low', 'low', '5more', 'more', 'big', 'high']], dtype=object)

In [9]:
Y

array(['unacc', 'unacc', 'unacc', ..., 'unacc', 'good', 'vgood'],
      dtype=object)

#### Step-4: Conversion from categorical data present in X

#### 1] class OrdinalEncoder

In [10]:
from sklearn.preprocessing import OrdinalEncoder

In [11]:
OE=OrdinalEncoder()

In [12]:
X=np.array(OE.fit_transform(X))
X

array([[3., 3., 0., 0., 2., 2.],
       [3., 3., 0., 0., 2., 0.],
       [3., 3., 0., 0., 1., 1.],
       ...,
       [1., 1., 3., 2., 0., 1.],
       [1., 1., 3., 2., 0., 2.],
       [1., 1., 3., 2., 0., 0.]])

#### Step-5: Conversion of categorical data in Y

In [14]:
from sklearn.preprocessing import LabelEncoder
LE=LabelEncoder()
print(Y)
Y=np.array(LE.fit_transform(Y))
print(Y)

['unacc' 'unacc' 'unacc' ... 'unacc' 'good' 'vgood']
[2 2 2 ... 2 1 3]


## Step-B: Building the classification model

#### step-1: Split data into training and testing

In [15]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2)

#### Step-2: Building Classifiers

#### i)K Nearest Neighbours(KNN)

In [16]:
from sklearn.neighbors import KNeighborsClassifier
KNN=KNeighborsClassifier(n_neighbors=7)
#### Training the KNN classifier
KNN.fit(xtrain,ytrain)
## testing the KNN Classifier
Ypred_KNN=KNN.predict(xtest)
## Some metric to test the classifier
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

#### ii)Naive Bayes Algorithm

In [18]:
from sklearn.naive_bayes import GaussianNB
NB=GaussianNB()
### Training the NB classifier
NB.fit(xtrain,ytrain)
### testing the NB classifier
Ypred_NB=NB.predict(xtest)

#### iii) Decision Tree

In [19]:
from sklearn.tree import DecisionTreeClassifier
DT=DecisionTreeClassifier(criterion='entropy',max_depth=4)
### Training the DT classifier
DT.fit(xtrain,ytrain)
### testing the DT classifier
Ypred_DT=DT.predict(xtest)

#### iv) random Forest Tree classifier

In [20]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_estimators=1000,criterion='entropy')
### Training the RFT classifier
rf.fit(xtrain,ytrain)
### testing the RFT classifier
Ypred_rf=rf.predict(xtest)

In [25]:
print("KNN Metrics for the model")
print("*************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_KNN)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_KNN)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_KNN,average=None,zero_division=0)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_KNN,average=None,zero_division=0)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_KNN,average=None,zero_division=0)))
print("Naive Bayes Metrics for the mode,average=None,zero_division=0l")
print("*********************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_NB)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_NB)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_NB,average=None,zero_division=0)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_NB,average=None,zero_division=0)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_NB,average=None,zero_division=0)))
print("Decision Tree Metrics for the mode,average=None,zero_division=0l")
print("***********************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_DT)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_DT)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_DT,average=None,zero_division=0)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_DT,average=None,zero_division=0)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_DT,average=None,zero_division=0)))
print("Random Forest Tree Metrics for the model")
print("****************************************")
print("Confusion Matrix:{}".format(confusion_matrix(ytest,Ypred_rf)))
print("Accuracy Score:{}".format(accuracy_score(ytest,Ypred_rf)))
print("Precision Score:{}".format(precision_score(ytest,Ypred_rf,average=None,zero_division=0)))
print("Recall Score:{}".format(recall_score(ytest,Ypred_rf,average=None,zero_division=0)))
print("F1 Score:{}".format(f1_score(ytest,Ypred_rf,average=None,zero_division=0)))

KNN Metrics for the model
*************************
Confusion Matrix:[[ 65   0  10   0]
 [  5   7   0   0]
 [  1   0 247   0]
 [  0   0   0  11]]
Accuracy Score:0.953757225433526
Precision Score:[0.91549296 1.         0.96108949 1.        ]
Recall Score:[0.86666667 0.58333333 0.99596774 1.        ]
F1 Score:[0.89041096 0.73684211 0.97821782 1.        ]
Naive Bayes Metrics for the mode,average=None,zero_division=0l
*********************************
Confusion Matrix:[[ 10   0  30  35]
 [  2   0   5   5]
 [  4   0 211  33]
 [  0   0   0  11]]
Accuracy Score:0.6705202312138728
Precision Score:[0.625      0.         0.85772358 0.13095238]
Recall Score:[0.13333333 0.         0.85080645 1.        ]
F1 Score:[0.21978022 0.         0.85425101 0.23157895]
Decision Tree Metrics for the mode,average=None,zero_division=0l
***********************************
Confusion Matrix:[[ 68   0   7   0]
 [ 12   0   0   0]
 [ 27   0 221   0]
 [ 11   0   0   0]]
Accuracy Score:0.8352601156069365
Precision Score