In [98]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns
import cv2
import pandas as pd
from glob import iglob
import warnings
warnings.filterwarnings('ignore')

In [99]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [100]:
def cost(theta, x, y):
    h = sigmoid(x @ theta)
    m = len(y)
    cost = 1 / m * np.sum(
        -y * np.log(h) - (1 - y) * np.log(1 - h)
    )
    grad = 1 / m * ((y - h) @ x)
    return cost, grad

In [110]:
def fit(x, y, max_iter=1000, alpha=0.1):
    x = np.hstack((np.ones((len(y),1)),x))
    thetas = []
    classes = np.unique(y)
    costs = np.zeros(max_iter)

    for c in classes:
        # one vs. rest binary classification
        binary_y = np.where(y == c, 1, 0)
        
        theta = np.zeros(x.shape[1])
        for epoch in range(max_iter):
            costs[epoch], grad = cost(theta, x, binary_y)
            theta += alpha * grad
            
        thetas.append(theta)
    return thetas, classes, costs

In [111]:
def predict(classes, thetas, x, length):
    x = np.hstack((np.ones((length,1)),x))
    preds = [np.argmax([sigmoid(xi @ theta) for theta in thetas]) for xi in x]
    return [classes[p] for p in preds]

In [137]:
images_classes= []
faces = pd.DataFrame([])
for path in iglob('./dataset/*.jpg'):
    clas= ((path.split('/')[-1]).split('.')[0]).split('_')[0]
    images_classes.append(clas)
    image = cv2.imread(path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    scale_percent = 60 # percent of original size
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    gray = cv2.resize(gray, dim,interpolation = cv2.INTER_NEAREST)
    face = pd.Series(gray.flatten(),name=path)
    faces = faces.append(face)

dim= gray.shape

In [138]:
r= 100
PCA_faces = pd.DataFrame([])
for i in range (len(faces)):
    gray= faces.iloc[i].values.reshape(dim).astype('uint8')
    U, S, VT= np.linalg.svd(gray, full_matrices= False)
    S= np.diag(S)
    face = pd.Series(gray.flatten(),name=path)

    approx= U[:,:r]@ S[0:r,:r]@ VT[:r,:]
    face= pd.Series(approx.flatten(),name=path)
    PCA_faces= PCA_faces.append(face)
    
print (PCA_faces.shape)

(520, 23409)


In [139]:
def split_data(dataset):
    length= int(0.2*len(dataset))
    train_data = dataset[length:]
    valid_data = dataset[:length]

    return train_data.to_numpy(), valid_data.to_numpy()

In [140]:
train_data, valid_data= split_data(PCA_faces)

length= int(0.2*len(PCA_faces))
train_labels= np.asarray(images_classes[length:])

valid_labels= np.asarray(images_classes[:length])

In [141]:
thetas, classes, costs = fit(train_data, train_labels)

In [142]:
def score(classes, theta, x, y):
    return (predict(classes, theta, x, len(y)) == y).mean()

In [143]:
print(f"Train Accuracy: {score(classes, thetas, train_data, train_labels):.3f}")
print(f"Test Accuracy: {score(classes, thetas, valid_data, valid_labels):.3f}")

Train Accuracy: 0.870
Test Accuracy: 0.529
