5. Consider the 128- dimensional feature vectors given in the “face feature vectors.csv” file.

Dataset Specifications:

Total number of samples = 800

Number of classes = 2 ( labeled as “male” and “female”)

Samples from “1 to 400” belongs to class “male”

Samples from “401 to 800” belongs to class “female”

Number of samples per class = 400

Use the following information to design classifier:

Number of test samples ( last 5 in each class) = 5

Number of training samples ( remaining 395 in each class) = 395

Number of dimensions = 128


In [102]:
import numpy as np
import pandas as pd

In [103]:
face_df = pd.read_csv('./face_feature_vectors.csv')
face_df.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,...,118,119,120,121,122,123,124,125,126,127
0,1,male,-0.06642,0.151611,0.02774,0.052771,-0.066105,-0.041232,-0.002637,-0.158467,...,0.025989,-0.001087,0.02726,-0.046754,-0.118619,-0.163774,-0.00059,-0.0764,0.107497,0.001567
1,2,male,-0.030614,0.049667,0.008084,-0.050324,0.007649,-0.063818,-0.01953,-0.119905,...,0.044229,-0.0239,-0.028108,0.040618,-0.146579,-0.141244,0.016162,0.017638,0.08061,-0.01593
2,3,male,-0.096178,0.061127,0.035326,-0.035388,-0.090728,-0.018634,-0.024315,-0.139786,...,0.111141,0.059436,-0.029222,0.042115,-0.222173,-0.116908,0.093428,0.017391,0.057652,0.086116
3,4,male,-0.103057,0.085044,0.078333,-0.035873,-0.028163,0.004924,0.007829,-0.017016,...,0.100793,-0.002644,-0.023388,0.029497,-0.13983,-0.119243,0.005306,-0.0151,0.161575,0.062462
4,5,male,-0.125815,0.120046,0.023131,-0.042901,0.038215,-0.049677,-0.054258,-0.130758,...,0.090197,0.067527,0.039926,0.047469,-0.056852,-0.0767,0.004966,0.028171,0.026041,0.084135
5,6,male,-0.149119,0.125288,0.142323,-0.009087,-0.031394,-0.123533,0.043598,-0.063999,...,0.060833,0.089529,-0.034872,0.05708,-0.137162,-0.072522,0.052731,-0.14146,0.019018,0.085765
6,7,male,-0.139035,0.073513,-0.00177,-0.034225,-0.10161,0.065105,-0.01442,-0.054993,...,0.081007,-0.002164,0.060377,0.080294,-0.139369,-0.150245,0.078657,0.024194,0.06218,0.036039
7,8,male,-0.074126,-0.000669,0.004166,-0.082413,-0.096091,-0.021992,0.009714,-0.056961,...,0.050497,0.038932,0.02352,-0.09026,-0.147692,-0.008296,0.007609,-0.026687,-0.017523,-0.03831
8,9,male,-0.16622,0.042769,-0.031647,-0.036892,-0.143837,-0.040566,0.042541,-0.122923,...,0.014732,-0.049135,0.08177,-0.027199,-0.096941,-0.094661,0.057797,-0.101063,0.061373,0.062176
9,10,male,-0.18577,0.154008,0.073184,-0.070829,-0.144617,-0.019732,-0.019418,-0.004675,...,0.093317,0.035101,-0.147997,-0.04601,-0.087777,-0.10066,0.03619,0.012158,0.032304,0.085996


In [104]:


#Function to check whether the data falls under case 1
def isCase1(mat):
    first = mat[0][0]
    for i in range(1, len(mat)):
        if mat[i][i] != first:
            return False
    return True


#If the data falls under case-1 discriminant function is computed accordingly.
def linear_case1(w, pw, cov_mat):
    u1 = np.mean(w, axis=0)
    cov = cov_mat[0][0]
    weight = u1 / cov
    bias = np.log(pw) - 0.5 * np.dot(u1.T, u1) / (cov ** 2)
    return weight, bias

#If the data falls under case-2 discriminant function is computed accordingly.
def linear_case2(w, pw, cov_mat):
    u1 = np.mean(w, axis=0)
    inv_cov = np.linalg.inv(cov_mat)
    weight = inv_cov @ u1
    bias = np.log(pw) - 0.5 * u1.T @ inv_cov @ u1
    return weight, bias

#If the data falls under case-3 discriminant function is computed accordingly.
def non_linear(w, pw, cov_mat):
    u1 = np.mean(w, axis=0)
    inv_cov = np.linalg.inv(cov_mat)
    weight1 = -0.5 * inv_cov
    weight2 = inv_cov @ u1
    bias = np.log(pw) - 0.5 * np.log(np.linalg.det(cov_mat)) - 0.5 * u1.T @ inv_cov @ u1
    return weight1, weight2, bias


#wieghts and biases are extracted from above functions by checking the cases for the given data.
def bayes_classifier(w1, w2, pw1, pw2):
    w1_cov = np.cov(w1, rowvar=False)
    w2_cov = np.cov(w2, rowvar=False)
    
    if np.allclose(w1_cov, w2_cov):
        weight1, bias1 = linear_case2(w1, pw1, w1_cov)
        weight2, bias2 = linear_case2(w2, pw2, w2_cov)
        return lambda x: np.dot(weight1 - weight2, x) + (bias1 - bias2)
    elif isCase1(w1_cov) and isCase1(w2_cov):
        weight1, bias1 = linear_case1(w1, pw1, w1_cov)
        weight2, bias2 = linear_case1(w2, pw2, w2_cov)
        return lambda x: np.dot(weight1 - weight2, x) + (bias1 - bias2)
    else:
        weight1_1, weight1_2, bias1 = non_linear(w1, pw1, w1_cov)
        weight2_1, weight2_2, bias2 = non_linear(w2, pw2, w2_cov)
        return lambda x: x.T @ (weight1_1 - weight2_1) @ x + np.dot(weight1_2 - weight2_2, x) + (bias1 - bias2)




In [105]:
#2 classes
n = 2

#No of features
features = 128

#as first 400 are male and next 400 are female
labels = np.array([0] * 400 + [1] * 400)

#Testing dataset
test_male_df = face_df.iloc[395 : 400]
test_female_df = face_df.iloc[795 : 800]

test_male_df = test_male_df.drop(columns = ['Unnamed: 1', 'Unnamed: 0'])
test_female_df = test_female_df.drop(columns = ['Unnamed: 1', 'Unnamed: 0'])

test_male_labels = labels[395 : 400] 
test_female_labels = labels[795 : 800]

#Training dataset
training_male_df = face_df.iloc[: 395]
training_female_df = face_df.iloc[400 : 795]

training_male_df = training_male_df.drop(columns = ['Unnamed: 1', 'Unnamed: 0'])
training_female_df = training_female_df.drop(columns = ['Unnamed: 1', 'Unnamed: 0'])


train_male_labels = labels[: 395]
test_female_labels = labels[400 : 795]

pw1, pw2 = 0.5, 0.5

In [106]:
result = bayes_classifier(training_male_df, training_female_df, pw1, pw2)
test_df = np.vstack((test_male_df, test_female_df))
test_labels = np.array([0] * 5 + [1] * 5)
predictions = np.array([0 if result(x) > 0 else 1 for x in test_df])

accuracy = np.mean(test_labels == predictions)*100
print("Accuracy of the model is", accuracy, "%")

#pred = np.mean()

Accuracy of the model is 50.0 %


  bias = np.log(pw) - 0.5 * np.log(np.linalg.det(cov_mat)) - 0.5 * u1.T @ inv_cov @ u1
  return lambda x: x.T @ (weight1_1 - weight2_1) @ x + np.dot(weight1_2 - weight2_2, x) + (bias1 - bias2)


In [107]:
for i, (true, pred) in enumerate(zip(test_labels, predictions)):
    print(f"Sample {i+1}: True label = {'Male' if true == 0 else 'Female'}, Predicted = {'Male' if pred == 0 else 'Female'}")


Sample 1: True label = Male, Predicted = Female
Sample 2: True label = Male, Predicted = Female
Sample 3: True label = Male, Predicted = Female
Sample 4: True label = Male, Predicted = Female
Sample 5: True label = Male, Predicted = Female
Sample 6: True label = Female, Predicted = Female
Sample 7: True label = Female, Predicted = Female
Sample 8: True label = Female, Predicted = Female
Sample 9: True label = Female, Predicted = Female
Sample 10: True label = Female, Predicted = Female
