In [42]:
import numpy as np
import PIL
from PIL import Image
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [28]:
arr = [[1, 1, 3], [2, 2, 1], [3, 3, 2]]
print(arr)
print(np.mean(np.array(arr)[0:3, :], axis=0))

[[1, 1, 3], [2, 2, 1], [3, 3, 2]]
[2. 2. 2.]


# 1. Download the Dataset and Understand the Format

In [2]:
#Downloaded in the project directory

# 2. Generate the Data Matrix and the Label vector

In [3]:
i = 0
s = 1
x = 1
d = [0] * 400
y = [0] * 400

while i < 400:
  image = PIL.Image.open("dataset/s" + str(s) + "/" + str(x) + ".pgm")
  image_sequence = image.getdata()
  image_array = np.array(image_sequence)
  d[i] = image_array
  y[i] = s
  i = i + 1
  x = x + 1
  if x == 11:
    s = s + 1
    x = 1

# 3. Split the Dataset into Training and Test sets

In [4]:
training_data = [0] * 200
testing_data = [0] * 200
training_labels = [0] * 200
testing_labels = [0] * 200
curr_index = 0

In [5]:
for i in range(1, 400, 2):
  curr_index = int(i / 2);
  testing_data[curr_index] = d[i]
  testing_labels[curr_index] = y[i]
  training_data[curr_index] = d[i - 1]
  training_labels[curr_index] = y[i - 1]

testing_data = np.array(testing_data)
testing_labels = np.array(testing_labels)
training_data = np.array(training_data)
training_labels = np.array(training_labels)

# 4. Classification using PCA

## Declaring variables

In [7]:
alpha = [0.8, 0.85, 0.9, 0.95]
n = len(testing_data[0])

## Calculating centered data, covariance matrix, eigen values, and eigen vectors

In [8]:
attributes_means = np.mean(training_data, axis=0)
Z = np.subtract(training_data, attributes_means.T)
cov_matrix = np.cov(Z.T)
eval, evec = np.linalg.eigh(cov_matrix)

In [9]:
idx = eval.argsort()[::-1]
eva = eval[idx]
eve = evec[:,idx]
eval_sum = np.sum(eval)

## Calculating reduced basis

In [10]:
reduced_basis = []
for curr_alpha in alpha:
    dimensionality = 0
    curr_eval_sum = 0
    for curr_eval in idx:
        dimensionality += 1
        curr_eval_sum += curr_eval
        if curr_eval_sum / eval_sum >= curr_alpha:
            break
    
    reduced_basis.append(eve[:, :dimensionality])

## Declaring the models

In [44]:
model_1_pca = KNeighborsClassifier(n_neighbors=1)
model_3_pca = KNeighborsClassifier(n_neighbors=3)
model_5_pca = KNeighborsClassifier(n_neighbors=5)
model_7_pca = KNeighborsClassifier(n_neighbors=7)
models = [model_1_pca, model_3_pca, model_5_pca, model_7_pca]

## Classifying the images

In [53]:
for i in range(len(alpha)):
    reduced_dim_data = np.dot(training_data, reduced_basis[i])
    reduced_din_test = np.dot(testing_data, reduced_basis[i])
    
    model_1_pca.fit(reduced_dim_data, training_labels)
    model_3_pca.fit(reduced_dim_data, training_labels)
    model_5_pca.fit(reduced_dim_data, training_labels)
    model_7_pca.fit(reduced_dim_data, training_labels)
    
    print("When alpha = " + str(alpha[i]))
    for k in range(1, 8, 2):
        print("     K-NN classifier = " + str(k))
        pred_model = models[int(k / 2)].predict(reduced_din_test)
        print("         Accuracy is: " + str(accuracy_score(testing_labels, pred_model)))
        
    print("**********************")
            

When alpha = 0.8
     K-NN classifier = 1
         Accuracy is: 0.94
     K-NN classifier = 3
         Accuracy is: 0.895
     K-NN classifier = 5
         Accuracy is: 0.845
     K-NN classifier = 7
         Accuracy is: 0.74
**********************
When alpha = 0.85
     K-NN classifier = 1
         Accuracy is: 0.94
     K-NN classifier = 3
         Accuracy is: 0.895
     K-NN classifier = 5
         Accuracy is: 0.845
     K-NN classifier = 7
         Accuracy is: 0.74
**********************
When alpha = 0.9
     K-NN classifier = 1
         Accuracy is: 0.94
     K-NN classifier = 3
         Accuracy is: 0.895
     K-NN classifier = 5
         Accuracy is: 0.845
     K-NN classifier = 7
         Accuracy is: 0.74
**********************
When alpha = 0.95
     K-NN classifier = 1
         Accuracy is: 0.94
     K-NN classifier = 3
         Accuracy is: 0.895
     K-NN classifier = 5
         Accuracy is: 0.845
     K-NN classifier = 7
         Accuracy is: 0.74
*********************

# 5. Classification Using LDA 

# 6. Classifier Tuning 

# 7. Compare vs Non-Face Images

# 8. Bonus 