# Q4
## Implement Bayes Classifier for Iris Dataset.

Dataset Specifications:

Total number of samples = 150

Number of classes = 3 (Iris setosa, Iris virginica, and Iris versicolor)

Number of samples in each class = 50

Use the following information to design classifier:

Number of training feature vectors ( first 40 in each class) = 40

Number of test feature vectors ( remaining 10 in each class) = 10

Number of dimensions = 4

Feature vector = <sepal length, sepal width, petal length, petal width>

If the samples follow a multivariate normal density, find the accuracy of classification for the test
feature vectors.

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

In [9]:
df = pd.read_csv("Iris_dataset.csv", header=0)
df.head(10)

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
5,5.4,3.9,1.7,0.4,Setosa
6,4.6,3.4,1.4,0.3,Setosa
7,5.0,3.4,1.5,0.2,Setosa
8,4.4,2.9,1.4,0.2,Setosa
9,4.9,3.1,1.5,0.1,Setosa


In [10]:
variety = np.unique(df["variety"],return_counts=True)
variety

(array(['Setosa', 'Versicolor', 'Virginica'], dtype=object),
 array([50, 50, 50], dtype=int64))

In [11]:
df = df.drop(['variety'],axis=1)
vec_np = df.to_numpy()

In [12]:
priori_prob = 40/120
priori_prob

0.3333333333333333

In [12]:
test={}
test['Setosa'] = vec_np[40:50]
test['Versicolor'] = vec_np[90:100]
test['Virginica'] = vec_np[140:150]

train={}
train['Setosa'] = vec_np[0:40]
train['Versicolor'] = vec_np[50:90]
train['Virginica'] = vec_np[100:140]

train_mean = {}
train_covar={}
train_incovar={}
det = {}
class_names = ['Setosa','Versicolor', 'Virginica']

for i in class_names: 
    train_mean[i] = np.mean(train[i],axis=0)
    train_covar[i] = np.cov(np.transpose(train[i]),ddof=1)
    train_incovar[i] = np.linalg.inv(train_covar[i])
    det[i] = np.linalg.det(train_covar[i])

In [16]:
def prob(x, variety, priori_prob):
    v = x - train_mean[variety]
    ex = np.dot(v, np.dot(train_incovar[variety], v)) * (- 0.5)
    return ex - (math.log(det[variety]) * 0.5) + math.log(priori_prob)

In [19]:
def classify(x,expected,error):
    seto_p = prob(x,'Setosa', priori_prob)
    virg_p = prob(x, 'Virginica', priori_prob)
    vers_p = prob(x, 'Versicolor', priori_prob)
    
    if(seto_p > virg_p and seto_p > vers_p):
        if(expected == 'Setosa'):
            error.append(1)
        else:
            error.append(0)
        return "Setosa, Setosa="+str(seto_p)+" Virginica="+str(virg_p)+" Versicolor="+str(vers_p)
    elif(virg_p > seto_p and virg_p > vers_p):
        if(expected == 'Virginica'):
            error.append(1)
        else:
            error.append(0)
        return "Virginica, Setosa="+str(seto_p)+" Virginica="+str(virg_p)+" Versicolor="+str(vers_p)
    elif(vers_p > virg_p and vers_p > seto_p):
        if(expected == 'Versicolor'):
            error.append(1)
        else:
            error.append(0)
        return "Versicolor, Setosa="+str(seto_p)+" Virginica="+str(virg_p)+" Versicolor="+str(vers_p)



In [20]:
error=[]
for j in class_names:
    for i in test[j]:
        print("Actual class = "+str(j)+" Predicted =",classify(i,j,error))
    print()


Actual class = Setosa Predicted = Setosa, Setosa=4.63310387911235Virginica=-78.07995969820902Versicolor=-48.60922516630989
Actual class = Setosa Predicted = Setosa, Setosa=-3.2551783239922676Virginica=-55.722793384945234Versicolor=-24.502686750246124
Actual class = Setosa Predicted = Setosa, Setosa=3.4757546187007247Virginica=-63.53789071428236Versicolor=-40.637442564564424
Actual class = Setosa Predicted = Setosa, Setosa=-2.759553396581521Virginica=-62.303428372793796Versicolor=-37.59257278742209
Actual class = Setosa Predicted = Setosa, Setosa=0.2619479039663155Virginica=-64.37245580978977Versicolor=-43.933636217951545
Actual class = Setosa Predicted = Setosa, Setosa=3.817470291471741Virginica=-63.23682014839941Versicolor=-33.5639692508607
Actual class = Setosa Predicted = Setosa, Setosa=3.838270235832767Virginica=-78.16970449707452Versicolor=-55.76911987993149
Actual class = Setosa Predicted = Setosa, Setosa=4.807118182623779Virginica=-63.936968198403584Versicolor=-39.41525618048801

In [18]:
Accuracy=sum(error)/len(error)
print("Accuracy=",Accuracy)

Accuracy= 1.0
