# Consider the 128- dimensional feature vectors given in the “face feature vectors.csv” file. Use this information to design and implement a Bayes Classifier.

Dataset Specifications:
Total number of samples = 800
Number of classes = 2 ( labelled as “male” and “female”)
Samples from “1 to 400” belongs to class “male”
Samples from “401 to 800” belongs to class “female”
Number of samples per class = 400
Use the following information to design classifier:
Number of test feature vectors ( first 5 in each class) = 5
Number of training feature vectors ( remaining 395 in each class) = 395
Number of dimensions = 128

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

In [4]:
df=pd.read_csv("./face feature vectors.csv")

In [5]:
df.head(401)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.066420,0.151611,0.027740,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.027260,-0.046754,-0.118619,-0.163774,-0.000590,-0.076400,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.019530,-0.119905,...,0.044229,-0.023900,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.080610,-0.015930
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.139830,-0.119243,0.005306,-0.015100,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.076700,0.004966,0.028171,0.026041,0.084135
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,397,male,-0.158460,0.109948,0.019088,0.015506,-0.069668,0.032311,0.015062,-0.140817,...,0.143453,0.059608,-0.006824,0.056758,-0.064352,-0.108518,0.132037,0.050347,0.071465,-0.022954
397,398,male,-0.101499,0.119739,0.016951,-0.013677,-0.055524,0.028399,0.028164,-0.152100,...,0.046488,0.051044,-0.008298,0.018010,-0.164197,-0.122292,0.032616,-0.030194,-0.018642,0.032821
398,399,male,-0.149516,0.081588,0.090796,-0.053116,-0.133314,0.001096,0.019941,-0.117803,...,0.111678,-0.016921,0.059115,-0.007810,-0.096824,-0.079415,-0.015487,-0.075470,0.062481,0.052727
399,400,female,0.039844,0.070357,0.130196,-0.007683,-0.077825,-0.021298,-0.024133,-0.085105,...,0.105510,0.081928,-0.033337,-0.023604,-0.167003,-0.059075,0.053074,0.080940,0.011467,-0.021999


In [6]:
gender=np.unique(df["Unnamed: 1"],return_counts=True)
gender

(array(['female', 'male'], dtype=object), array([401, 399], dtype=int64))

In [7]:
df.columns

Index(['Unnamed: 0', 'Unnamed: 1', '0', '1', '2', '3', '4', '5', '6', '7',
       ...
       '118', '119', '120', '121', '122', '123', '124', '125', '126', '127'],
      dtype='object', length=130)

In [8]:
vectors=df.drop(['Unnamed: 0', 'Unnamed: 1'],axis=1)

In [9]:
vectors.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,-0.06642,0.151611,0.02774,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,0.130467,-0.044872,...,0.025989,-0.001087,0.02726,-0.046754,-0.118619,-0.163774,-0.00059,-0.0764,0.107497,0.001567
1,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.01953,-0.119905,0.186553,-0.044821,...,0.044229,-0.0239,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.08061,-0.01593


In [10]:
vec_np=vectors.to_numpy()

## Scale the data so as to not get an impractical determinant

In [11]:
vec_np=vec_np*1000

In [12]:
#from the computation above
p_male=(399-5)/790
p_female=(401-5)/790

In [13]:
test_male=vec_np[0:5]
test_female=vec_np[399:404]
train_male=vec_np[5:399]
train_m_mean=np.mean(train_male,axis=0)
train_female=vec_np[404:800]
train_f_mean=np.mean(train_female,axis=0)

In [14]:
covar_f=np.cov(np.transpose(train_female),ddof=1)
incovar_f=np.linalg.inv(covar_f)
covar_m=np.cov(np.transpose(train_male),ddof=1)
incovar_m=np.linalg.inv(covar_m)

In [15]:
det_f=np.linalg.det(covar_f)#for female
det_m=np.linalg.det(covar_m)#for male
#log_det_f=math.log(det_f)+math.log(10)*(-5*128)
#log_det_m=math.log(det_m)+math.log(10)*(-5*128)
print("female:",det_f,"male:",det_m)

female: 5.001428883516973e-96 male: 7.444467112593653e-95


## For easier representation we use the log( p(wi|x) ), we ignore p(x)
## log( p(x|wi) )+ log( wi ) will be the deciding factor
## log( p(x|wi) ) is further reduced to [-0.5 * (x-mean)t.(inv_cov_i).(x-mean)] - [0.5*log(det(cov_i))]

# [-0.5 * (x-mean)t.(inv_cov_i).(x-mean)] - [0.5*log(det(cov_i))] + p(wi) is computed and it is used to draw conclusions

In [16]:
#K=math.log(math.pi*2)*128/2
def probmale(x):
    v=x-train_m_mean
    #print(v)
    #print(np.dot(incovar_m,v.transpose()))
    #v.incovar_m.vT
    ex=np.dot(v,np.dot(incovar_m,v))*(-0.5)
    #print("ex==",ex)
    return ex-(math.log(det_m)*0.5)#-K

def probfemale(x):
    v=x-train_f_mean
    #print(v)
    #print(np.dot(incovar_m,v.transpose()))
    #v.incovar_m.vT
    ex=np.dot(v,np.dot(incovar_f,v))*(-0.5)
    return ex-(math.log(det_f)*0.5)#-K

In [17]:
def classify(x,expected,error):
    f=probfemale(x)+math.log(p_female)
    m=probmale(x)+math.log(p_male)
    if(f>m):
        if(expected=="Female"):
            error.append(1)
        else:
            error.append(0)
        return "Female, Measure its female="+str(f)+" Measure its male="+str(m)
    else:
        if(expected=="Female"):
            error.append(0)
        else:
            error.append(1)
        return "Male, Measure its female="+str(f)+" Measure its male="+str(m)

In [18]:
error=[]
for i in test_male:
    print("Actual class=Male","Predicted =",classify(i,"Male",error))
    
print()
for i in test_female:
    print("Actual class=Female","Predicted =",classify(i,"Female",error))

Actual class=Male Predicted = Male, Measure its female=-65.01822438742384 Measure its male=27.304517170112987
Actual class=Male Predicted = Male, Measure its female=-69.74692850699067 Measure its male=22.94953125405725
Actual class=Male Predicted = Male, Measure its female=-107.29348829294905 Measure its male=22.77969605818059
Actual class=Male Predicted = Male, Measure its female=-55.951859211077696 Measure its male=27.50575843155518
Actual class=Male Predicted = Male, Measure its female=-118.02287341839984 Measure its male=12.10170008794073

Actual class=Female Predicted = Male, Measure its female=-124.7631606813462 Measure its male=-7.726351199041644
Actual class=Female Predicted = Male, Measure its female=-81.73709893924534 Measure its male=4.12950393512135
Actual class=Female Predicted = Female, Measure its female=20.371837545657748 Measure its male=-221.5942249824978
Actual class=Female Predicted = Female, Measure its female=20.529539722896864 Measure its male=-112.92014568445605

These values are simply measures

In [19]:
Accuracy=sum(error)/len(error)
print("Accuracy=",Accuracy)

Accuracy= 0.8
