___
# Facial Expression Recognition Library
___

In this project, I will aspired to libraries such as Sci_kit learn and create a plug-in neural network library

This was a Kaggle competition from 2013. I used the data to practice different models on the data. Following is overview of the competition and the data:


    One motivation for representation learning is that learning algorithms can design features better and faster than humans can. To this end, we hold this challenge that does not explicitly require that entries use representation learning. Rather, we introduce an entirely new dataset and invite competitors from all related communities to solve it. The dataset for this challenge is a facial expression classification dataset that we have assembled from the internet. Because this is a newly introduced dataset, this contest will see which methods are the easiest to get quickly working on new data.
    
## Data
____

    The data consists of 48x48 pixel grayscale images of faces. The faces have been automatically registered so that the face is more or less centered and occupies about the same amount of space in each image. The task is to categorize each face based on the emotion shown in the facial expression in to one of seven categories (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral).

## Utilities
____
Comprised of utility functions

In [1]:
import numpy as np
import pandas as pd

In [2]:
def init_weight_and_bias(M1, M2):
    # M1: input size
    # M2: output size

    W = np.random.randn(M1, M2) / np.sqrt(M1 + M2)
    # matrix of M1 by M2, randomized initially to Gaussian normal, divided by square root of the fan-in plus fan-out.
    b = np.zeros(M2)
    # bias initalized as zeros
    return W.astype(np.float32), b.astype(np.float32)

In [3]:
def init_filter(shape, poolsz):
    # used for convolutional neural network
    w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz)))
    return w.astype(np.float32)

In [4]:
# activation functions
def relu(x):
    return x * (x > 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    expX = np.exp(x)
    return expX / expX.sum(axis=1, keepdims=True)

In [5]:
# cost functions
def sigmoid_cost(T, Y):
    # calculates the cross entropy from the definition for sigmoid cost/ binary classification
    return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum()

def cost(T, Y):
    # general cross entropy function, works for softmax
    return -(T*np.log(Y)).sum()

def cost2(T, Y):
    # same as cost(), just uses the targets to index Y
    # instead of multiplying by a large indicator matrix with mostly 0s
    N = len(T)
    return -np.log(Y[np.arange(N), T]).mean()

In [6]:
# error rate related!
def error_rate(targets, predictions):
    return np.mean(targets != predictions)

In [7]:
# data cleaning related functions

def y_to_indicator(y):
    # convert y into indicator matrix
    # size will be N by K
    N = len(y)
    K = len(set(y))
    ind = np.zeros((N, K))
    for i in range(N):
        ind[i, y[i]] = 1
    return ind

def getData(balance_ones=True):
    # get facial expression data
    
    # images are 48x48 = 2304 size vectors
    
    #initialize empty list for X and Y
    Y = []
    X = []
    
    # open data
    first = True
    for line in open('input/fer2013.csv'):
        # skip first line
        if first:
            first = False
        else:
            row = line.split(',')
            # first column is labels -> y
            Y.append(int(row[0]))      
            # second column is space separated pixels
            X.append([int(p) for p in row[1].split()])

    # convert these into Numpy array
    # and also normalize the data
    X = np.array(X) / 255.0
    Y = np.array(Y)
    
    # because we have imbalance class problem, we will balance the class 1. 
    if balance_ones:
        # get all data except class 1
        X0, Y0 = X[Y!=1, :], Y[Y!=1]
        
        # get all class 1 data
        X1 = X[Y==1, :]
        
        # repeat the data 9 times
        X1 = np.repeat(X1, 9, axis=0)
        
        # stack the data for X0 and X1  
        # stack the data for Y0 and 1
        X = np.vstack([X0, X1])
        Y = np.concatenate((Y0, [1]*len(X1)))

    return X, Y

def getImageData():
    # keep image shape
    X, Y = getData()
    N, D = X.shape
    d = int(np.sqrt(D))
    X = X.reshape(N, 1, d, d)
    return X, Y

def getBinaryData():
    # same as getData function, except we only get binary data/ Y = 0, 1
    Y = []
    X = []
    first = True
    for line in open('input/fer2013.csv'):
        if first:
            first = False
        else:
            row = line.split(',')
            y = int(row[0])
            if y == 0 or y == 1:
                Y.append(y)
                X.append([int(p) for p in row[1].split()])
    return np.array(X) / 255.0, np.array(Y)

___
## Artificial Neural Network for Binary Classification using ReLU and tanh
___
Create ANN class for facial expression recognition.
- Binary Classification (only between 0 and 1)
- One hidden layer
- uses relu and tanh as activation function.

In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

class ANN(object):
    def __init__(self, M):
        self.M = M

    def fit(self, X, Y, learning_rate=5e-6, reg=1.0, epochs=10000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M) / np.sqrt(self.M)
        self.b2 = 0

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)            

            # back propagation - gradient descent step
            
            # hidden-to-outer
            pY_Y = pY - Y # prediction minus the target
            self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2) # update hidden-to-output weight
            self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2) # update hidden-out-output bias

            # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape
            # print "Z shape:", Z.shape

            # input-to-hidden
            dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z) # tanh
            # dZ = np.outer(pY_Y, self.W2) * (Z > 0) # ReLU
            
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) # update input-to-hidden weight
            self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1) # update input-to-hidden bias
            
            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)

                c = sigmoid_cost(Yvalid, pYvalid)

                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)
        print("validation score: ", self.score(Xvalid, Yvalid))
        
        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        Z = np.tanh(X.dot(self.W1) + self.b1) # tanh
        # Z = relu(X.dot(self.W1) + self.b1) # ReLU

        return sigmoid(Z.dot(self.W2) + self.b2), Z


    def predict(self, X):
        pY = self.forward(X)
        return np.round(pY)


    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)

#     def main():
#         X, Y = getBinaryData()

#         X0 = X[Y==0, :]
#         X1 = X[Y==1, :]
#         X1 = np.repeat(X1, 9, axis=0)
#         X = np.vstack([X0, X1])
#         Y = np.array([0]*len(X0) + [1]*len(X1))

#         model = ANN(100)
#         model.fit(X, Y, show_fig=True)

# if __name__ == '__main__':
#     main()

In [13]:
# Try Using ANN class!
X, Y = getBinaryData()

# balance imbalance problem in the data
X0 = X[Y==0, :]
X1 = X[Y==1, :]
# repeat class 1, 9 times
X1 = np.repeat(X1, 9, axis=0)
# stack X and Y
X = np.vstack([X0, X1])
Y = np.array([0]*len(X0) + [1]*len(X1))

In [14]:
# initialize the model
model = ANN(100) # hidden layer size to 100

# fit the data into the model, use tanh activation. ** note: after testing few times, relu activation function doesn't behave well in this model.
model.fit(X , Y, show_fig=True, epochs = 10000)
print(model.score(X_test, y_test))

i: 0 cost: 712.8074841887204 error: 0.505
i: 20 cost: 675.4204328510997 error: 0.375
i: 40 cost: 673.4038044929923 error: 0.444
i: 60 cost: 674.8167300140576 error: 0.449
i: 80 cost: 660.8945024117224 error: 0.403
i: 100 cost: 658.1536637345562 error: 0.394
i: 120 cost: 654.0929366817682 error: 0.376
i: 140 cost: 650.7621580399922 error: 0.371
i: 160 cost: 647.8151636201803 error: 0.365
i: 180 cost: 645.1117648145158 error: 0.363
i: 200 cost: 642.6083010292409 error: 0.363
i: 220 cost: 640.2769094804295 error: 0.359
i: 240 cost: 638.091964389127 error: 0.355
i: 260 cost: 636.0288398245254 error: 0.356
i: 280 cost: 634.0666322335646 error: 0.356
i: 300 cost: 632.1883163007485 error: 0.355
i: 320 cost: 630.3800398017194 error: 0.35
i: 340 cost: 628.6304962047142 error: 0.346
i: 360 cost: 626.9304523100462 error: 0.345


KeyboardInterrupt: 

## Artificial Neural Network Multiple Classfication with Softmax
___

In [None]:
class ANNwithSoftmax(object):
    def __init__(self, M):
        self.M = M

    def fit(self, X, Y, learning_rate=10e-6, reg=10e-1, epochs=10000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Tvalid = y2indicator(Yvalid)
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        K = len(set(Y))
        T = y_to_indicator(Y)
        
        # input-to-hidden layer
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        
        # hidden-to-output layer
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # gradient descent step
            pY_T = pY - T
            self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2)
            self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2)
            
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh
            
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1)

            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)
        print("validation score: ", self.score(Xvalid, Yvalid))

        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        # Z = relu(X.dot(self.W1) + self.b1)
        Z = np.tanh(X.dot(self.W1) + self.b1)
        return softmax(Z.dot(self.W2) + self.b2), Z

    def predict(self, X):
        pY, _ = self.forward(X)
        return np.argmax(pY, axis=1)

    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


# def main():
#     X, Y = getData()
    
#     model = ANN(200)
#     model.fit(X, Y, reg=0, show_fig=True)
#     print(model.score(X, Y))
#     # scores = cross_val_score(model, X, Y, cv=5)
#     # print "score mean:", np.mean(scores), "stdev:", np.std(scores)

# if __name__ == '__main__':
#     main()


In [None]:
model = ANNwithSoftmax(200)
model.fit(X, Y, reg=0, show_fig=True)
print(model.score(X, Y))
# scores = cross_val_score(model, X, Y, cv=5)
# print "score mean:", np.mean(scores), "stdev:", np.std(scores)

Updates on different methods coming soon...