In [None]:
import os
import sys
import struct
import pickle

from PIL import Image
from scipy.special import expit
import numpy as np
import tensorflow as tf

## Neurol Network Implementation

In [1]:
class NeuralNetMLP(object):

    def __init__(self, n_output, n_features, n_hidden=30, c=0.001, epochs=500, eta=0.001, batch_size=50):
        np.random.seed(43)
        tf.set_random_seed(43)
        self.n_output = n_output
        self.n_features = n_features
        self.n_hidden = n_hidden
        self.w1, self.w2, self.b1, self.b2 = self._initialize_weights()
        self.c = c
        self.epochs = epochs
        self.eta = eta
        self.batch_size = batch_size
        self.sess = tf.Session()

    def _encode_labels(self, y):
        onehot = np.zeros((self.n_output, y.shape[0]))
        for idx, val in enumerate(y):
            onehot[val, idx] = 1.0
        return onehot

    def _initialize_weights(self):
        return [
            tf.Variable(tf.random_normal(shape)) 
            for shape in [
                (self.n_features, self.n_hidden), 
                (self.n_hidden, self.n_output), 
                (self.n_hidden,), 
                (self.n_output,)
            ]]

    def _feedforward(self, x):
        hidden_layer = tf.nn.sigmoid(tf.add(tf.matmul(x, self.w1), self.b1))
        return tf.nn.sigmoid(tf.add(tf.matmul(hidden_layer, self.w2), self.b2))
    
    def _regularization(self):
        return (tf.nn.l2_loss(self.w1) + tf.nn.l2_loss(self.w2))

    def _loss(self, prediction, y):
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
    
    def _cost(self, prediction, y):
        return tf.reduce_mean(self._loss(prediction, y) + self.c * self._regularization())
    
    def fit(self, X, labels, print_progress=False):
        self._x = tf.placeholder("float", [None, self.n_features])
        y = tf.placeholder("float", [None, self.n_output])

        prediction = self._feedforward(self._x)
        self._predict = tf.argmax(prediction, axis=1)
        cost = self._cost(prediction, y)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.eta).minimize(cost)

        self.sess.run(tf.global_variables_initializer())
        total_batch = int(X.shape[0] / self.batch_size)
        encode_y = self._encode_labels(labels).T
        
        for epoch in range(self.epochs):
            for i in range(total_batch):
                X_batch = X[self.batch_size * i : self.batch_size* (i + 1)]
                y_batch = encode_y[self.batch_size * i : self.batch_size* (i + 1)]
                _, c = self.sess.run([optimizer, cost], feed_dict={self._x: X_batch , y: y_batch })
            if epoch % 1000 == 0:
                print("Epoch:", epoch + 1, "cost=", c)
        print("End")
        return self


    def predict(self, X):
        return self.sess.run(self._predict, feed_dict={self._x: X})

## Part A

In [2]:
class FaceDirectionPredictor(object):

    def __init__(self, labels=('left', 'straight', 'right',  'up')):
        self.file_label_number = 1
        self.labels = labels
        self.directions = {j: i for i, j in enumerate(self.labels)}
        self._model = object()
        self.l2 = 0.1
        self.l1 = 0.0
        self.epochs = 10000
        self.eta = 0.001
        self.alpha = 0.001
        self.decrease_const = 0.00001
        self.minibatches = 50

    def _labels_to_number(self, label):
        return self.directions[label]

    def _number_to_label(self, number):
        return self.labels[number]

    @staticmethod
    def compress_image(pic):
        """
        Compress image to half size
        pic: np array of dimensions M x N
        return: np array of dimensions M/2 x N/2
        """
        return np.array([
            pic[m:m + 2, n:n + 2].sum() / 4.0
            for m in range(0, pic.shape[0], 2)
            for n in range(0, pic.shape[1], 2)
        ]).reshape((int(pic.shape[0] / 2), int(pic.shape[1] / 2)))

    def _pic_to_features(self, pic):
        '''
        Convert pictures to two time compressed feature array
        pic: np array of dimensions M x N
        return: np array of one dimensions with size M/4 * N/4
        '''
        return self.compress_image(self.compress_image(pic)).reshape(-1)

    def _read_pics(self, folder="TrainingSet"):
        '''
        Reads all image and labels from given folder
        folder: TrainingSet or TestSet
        return: list of tuple of label and image matrix
        '''
        return [(np.asarray(Image.open(folder + "/" + i).convert('L')),
                 i.split('_')[self.file_label_number])
                for i in os.listdir(folder)]

    def _pic_to_X_y(self, folder="TrainingSet"):
        '''
        Convert traing set pictures to X and y
        return: tuple of X, y
        '''
        return map(np.asarray, zip(*[
            (
                self._pic_to_features(pic),
                self._labels_to_number(label)
            )
            for pic, label in self._read_pics()
        ]))

    def fit(self):
        '''
        Fits data for traing set
        '''
        self._model = NeuralNetMLP(c=self.l2, epochs=self.epochs, eta=self.eta, n_output=4, n_features=960)
        (X, y) = self._pic_to_X_y()
        self._model.fit(X, y)

    def predict(self, pic):
        '''
        pic: np array of dimensions 120 x 128 representing an image
        return: String specifying direction that the subject is facing
        '''
        x = self._pic_to_features(pic)
        return self._number_to_label(self._model.predict(np.array([x]))[0])

    def __str__(self):
        return ' '.join([
            str(self.labels),
            "l2:", str(self.l2),
            "l1:", str(self.l1),
            "epochs:", str(self.epochs),
            "eta:", str(self.eta),
            "alpha:", str(self.alpha),
            "decrease_const:", str(self.decrease_const),
            "minibatches:", str(self.minibatches)
        ])

In [3]:
class EmotionFeltPredictor(FaceDirectionPredictor):

    def __init__(self, labels=('neutral', 'sad', 'angry', 'happy')):
        super(self.__class__, self).__init__(labels)
        self.file_label_number = 2
        self.l2 = 0.01

    @staticmethod
    def compress_border(pic):
        x, y = pic.shape
        return pic[int(x / 4):int(x * 3 / 4), int(y / 4):int(y * 3 / 4)]

    def _pic_to_features(self, pic):
        '''
        Convert pictures to two time compressed feature array
        pic: np array of dimensions M x N
        return: np array of one dimensions with size M/4 * N/4
        '''
        return self.compress_image(self.compress_border(pic)).reshape(-1)

In [4]:
from sklearn.metrics import accuracy_score

def accuracy(model):
    print(str(model))

    (matrixs, labels) = zip(*model._read_pics())
    predicted_labels = [model.predict(m) for m in matrixs]
    score = accuracy_score(labels, predicted_labels)
    print('train accuracy: %s ' % score)

    (matrixs, labels) = zip(*model._read_pics('TestSet'))
    predicted_labels = [model.predict(m) for m in matrixs]
    score = accuracy_score(labels, predicted_labels)
    print('test accuracy: %s ' % score)

## Part B without hyperparamater optimization

In [16]:
direction_predictor = FaceDirectionPredictor()
direction_predictor.l1 = 0
direction_predictor.fit()

Epoch: 1 cost= 1431.46
Epoch: 1001 cost= 1.24864
Epoch: 2001 cost= 1.24372
Epoch: 3001 cost= 1.21121
Epoch: 4001 cost= 1.23506
Epoch: 5001 cost= 1.2235
Epoch: 6001 cost= 1.25993
Epoch: 7001 cost= 1.21988
Epoch: 8001 cost= 1.2613
Epoch: 9001 cost= 1.23
End


In [18]:
accuracy(direction_predictor)

('left', 'straight', 'right', 'up') l2: 0.1 l1: 0 epochs: 10000 eta: 0.001 alpha: 0.001 decrease_const: 1e-05 minibatches: 50
train accuracy: 0.977777777778 
test accuracy: 0.977777777778 


## Part c with hyperparamater optimization

In [166]:
direction_predictor = FaceDirectionPredictor()
direction_predictor.fit()

Epoch: 1 cost= 1454.55
Epoch: 1001 cost= 1.24857
Epoch: 2001 cost= 1.23577
Epoch: 3001 cost= 1.24777
Epoch: 4001 cost= 1.2128
Epoch: 5001 cost= 1.21919
Epoch: 6001 cost= 1.23164
Epoch: 7001 cost= 1.23283
Epoch: 8001 cost= 1.24194
Epoch: 9001 cost= 1.2393
End


In [167]:
accuracy(direction_predictor)

('left', 'straight', 'right', 'up') l2: 0.1 l1: 0.0 epochs: 10000 eta: 0.001 alpha: 0.001 decrease_const: 1e-05 minibatches: 50
train accuracy: 0.987301587302 
test accuracy: 0.988888888889 


## Part d face detection

In [151]:
emotion_predictor = EmotionFeltPredictor()
emotion_predictor.fit()

Epoch: 1 cost= 145.733
Epoch: 1001 cost= 31.8213
Epoch: 2001 cost= 7.34582
Epoch: 3001 cost= 2.29287
Epoch: 4001 cost= 1.49015
Epoch: 5001 cost= 1.40712
Epoch: 6001 cost= 1.33795
Epoch: 7001 cost= 1.33143
Epoch: 8001 cost= 1.31391
Epoch: 9001 cost= 1.32125
End


In [152]:
accuracy(emotion_predictor)

('neutral', 'sad', 'angry', 'happy') l2: 0.01 l1: 0.0 epochs: 10000 eta: 0.001 alpha: 0.001 decrease_const: 1e-05 minibatches: 50
train accuracy: 0.288888888889 
test accuracy: 0.266666666667 
