# Face Recognition

### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, transform
import os

%matplotlib inline

## Helper Functions

### Splitting Data

In [2]:
def split_data(path = 'FaceDataset'):
    subfolders = [f.path for f in os.scandir(path) if f.is_dir()]
    sorted(subfolders)
    train = []
    test = []
    for subfolder in subfolders:
        dirFiles = os.listdir(subfolder)
        sorted(dirFiles)
        subImages = []
        for img in dirFiles:
            if os.path.splitext(img)[1] == '.pgm':
                subImages.append(subfolder+'\\'+img)
        subTrain = subImages[ : len(subImages)//2]
        train.extend(subTrain)
        subTest = subImages[len(subImages)//2 : ]
        test.extend(subTest)
    return train, test

### Basic Functions

In [3]:
def openImage(path):
    return io.imread(path)

def displayImage(image_arr):
    plt.imshow(image_arr)
    
def downscale(image_arr):
    return transform.resize(image_arr, (image_arr.shape[0]//2,image_arr.shape[1]//2))

def toVector(image_arr):
    return image_arr.reshape(-1,1)

### Class Model Making Function

In [4]:
def allClassModels(dirGroup):
    groups = dirGroup.groups.keys()
    models = []
    for group_name in groups:
        group = dirGroup.get_group(group_name)
        images = group["Images"]
        vectorImages = []
        for img in images:
            vectorImages.append(toVector(downscale(openImage(img))))
        model = np.hstack(vectorImages)
        models.append(model)
    return np.array(models)

## Algorithm

In [5]:
def B_hat(key_to_class, train_df):
    Bees = []
    for i in range(train_df.shape[0]):
        image_url = train_df["Images"][i]
        y = toVector(downscale(openImage(image_url)))
        X = key_to_class[ train_df["Dir"][i] ]
        Xt = np.transpose(X)
        inv = np.linalg.inv(np.dot(Xt,X)) 
        B = np.dot(inv,np.dot(Xt,y))
        Bees.append(B)
    train_df["Bees"] = Bees

In [6]:
def distance(key_to_class, train_df, test):
    B_hat(key_to_class, train_df)
    test_df = train_df.copy()
    y = toVector(downscale(openImage(test)))
    dist_list = []
    for i in range(test_df.shape[0]):
        X = key_to_class[test_df["Dir"][i]]
        y_hat = np.dot(X, test_df["Bees"][i])
        dist = np.linalg.norm(y-y_hat)
        dist_list.append(dist)
    test_df["Distance"] = dist_list
    return test_df

In [7]:
def predict(key_to_class, train_df, test):
    test_df = distance(key_to_class, train_df, test)
    minDist = test_df["Distance"].idxmin()
    return test_df.loc[minDist]["Dir"]

## Accuracy

In [8]:
def getAccuracy(key_to_class, train_df, testing_df):
    all_min_dist = []
    predictions = []
    for i in range(testing_df.shape[0]):
        test_df = distance(key_to_class, train_df, testing_df["Images"][i])
        minDist = test_df["Distance"].idxmin()
        all_min_dist.append(test_df.loc[minDist]['Distance'])
        pred = test_df.loc[minDist]['Dir']
        predictions.append(pred)
    testing_df['Prediction'] = predictions
    testing_df['L2-Norm'] = all_min_dist
    testing_df['Right'] = testing_df['Dir'] == testing_df['Prediction']
    print("Accuracy = ",testing_df['Right'].value_counts()[True]/2)
    return testing_df

## Real Implementation

Spliting Data into Train and Test Data

In [9]:
train, test = split_data()

Making DataFrames of Train and Test Data to make it easy to manage

In [10]:
train_df = pd.DataFrame(train, columns=['Images'])
train_df["Dir"] = train_df["Images"].str.split("\\").str.get(1)

testing_df = pd.DataFrame(test, columns=['Images'])
testing_df["Dir"] = testing_df["Images"].str.split("\\").str.get(1)

Grouping by classes

In [11]:
dirGroup = train_df.groupby('Dir')
keys = dirGroup.groups.keys()

Making Predictor for each class

In [12]:
X1, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X2, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X3, X30, X31, X32, X33, X34, X35, X36, X37, X38, X9, X4, X40, X5, X6, X7, X8, X9 = allClassModels(dirGroup)
classes = [X1, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X2, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X3, X30, X31, X32, X33, X34, X35, X36, X37, X38, X9, X4, X40, X5, X6, X7, X8, X9]

Making a dictionary mapping the key to the predictor

In [13]:
key_to_class = dict(zip(keys, classes))

Predicting the class of the Image

In [14]:
predict(key_to_class, train_df, "FaceDataset\\s1\\1.pgm")

's1'

Accuracy of our Model

In [15]:
pd.set_option("display.max_rows", None)

In [16]:
getAccuracy(key_to_class, train_df, testing_df)

Accuracy =  91.5


Unnamed: 0,Images,Dir,Prediction,L2-Norm,Right
0,FaceDataset\s1\5.pgm,s1,s1,7.012997,True
1,FaceDataset\s1\6.pgm,s1,s1,6.48848,True
2,FaceDataset\s1\7.pgm,s1,s1,6.560869,True
3,FaceDataset\s1\8.pgm,s1,s1,7.3219,True
4,FaceDataset\s1\9.pgm,s1,s1,8.011391,True
5,FaceDataset\s10\5.pgm,s10,s10,6.364379,True
6,FaceDataset\s10\6.pgm,s10,s10,6.403414,True
7,FaceDataset\s10\7.pgm,s10,s10,5.608173,True
8,FaceDataset\s10\8.pgm,s10,s10,4.524284,True
9,FaceDataset\s10\9.pgm,s10,s38,7.915857,False
