# Face Recognition Challenge - Parth S. Patel

### Problem:
We will try to build a classification model that will run through multiple 2d pics of famous celebreties and predict name based on image.

### Solution:

In [1]:
import numpy as np
import random
import tensorflow as tf
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import pylab as pl

  from ._conv import register_converters as _register_converters


In [2]:
from utils.tensorboard import Tensorboard
from model import Model
from utils.augment import Augment
from train import train

### Data
Fetch the data and split into images and labels

In [3]:
dataset = fetch_lfw_people(data_home=None,
                           resize=1.0,
                           color=True,
                           download_if_missing=True,
                           min_faces_per_person=20)

images = dataset.images
labels = dataset.target

One Hot Encode the labels

In [4]:
labels_encoded = np.zeros((len(labels), len(set(labels))))
labels_encoded[np.arange(len(labels)), labels] = 1

Data Description

In [5]:
print('> Data Shape: {}'.format(images.shape))
print('> Label Shape: {}'.format(labels.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))
print('> People: {}'.format(set(dataset.target_names)))
print('> Classes: {}'.format(set(labels)))

> Data Shape: (3023, 125, 94, 3)
> Label Shape: (3023,)
> Number of Classes: 62
> People: {'Jack Straw', 'Colin Powell', 'Jeremy Greenstock', 'Jennifer Aniston', 'Hans Blix', 'Hugo Chavez', 'Kofi Annan', 'Mahmoud Abbas', 'Jean Chretien', 'Nestor Kirchner', 'David Beckham', 'Hamid Karzai', 'Vicente Fox', 'Laura Bush', 'Tom Daschle', 'Jennifer Lopez', 'Jiang Zemin', 'Alejandro Toledo', 'Guillermo Coria', 'Rudolph Giuliani', 'Bill Clinton', 'Lindsay Davenport', 'Ariel Sharon', 'Lleyton Hewitt', 'Andre Agassi', 'George Robertson', 'Igor Ivanov', 'John Negroponte', 'Carlos Menem', 'Junichiro Koizumi', 'Jennifer Capriati', 'Gerhard Schroeder', 'Recep Tayyip Erdogan', 'Tiger Woods', 'John Ashcroft', 'Ricardo Lagos', 'Arnold Schwarzenegger', 'Megawati Sukarnoputri', 'Jacques Chirac', 'George W Bush', 'Paul Bremer', 'Tom Ridge', 'Amelie Mauresmo', 'Juan Carlos Ferrero', 'Roh Moo-hyun', 'Donald Rumsfeld', 'Vladimir Putin', 'Alvaro Uribe', 'Saddam Hussein', 'Naomi Watts', 'Atal Bihari Vajpayee', 

### Augmentation
Augment the data through:
* random cropping of the image from 125x94 to 63x63 pixels
* randomly modifying the hue, contrast, brightness
* randomly flip images horizontally

*Warning: Augmentation takes a long time.*

Crop the images twice to double the data size

In [6]:
aug = Augment()

In [7]:
images_selected = aug.randomCropAll(images, 63, 63)
labels_selected = labels_encoded
labels_names_selected = dataset.target_names

Ensure the images and labels are numpy arrays

In [8]:
if type(images_selected).__module__ is not np.__name__:
    print('> Converting images to a numpy array')
    images_selected = np.array(images_selected)

if type(labels_selected).__module__ is not np.__name__:
    print('> Converting labels to a numpy array')
    labels_selected = np.array(labels_selected)

> Converting images to a numpy array
> Converting labels to a numpy array


Augmented Data Description

In [9]:
print('> Data Shape: {}'.format(images.shape))
print('> Label Shape: {}'.format(labels.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))
print('> People: {}'.format(set(dataset.target_names)))
print('> Classes: {}'.format(set(labels)))

> Data Shape: (3023, 125, 94, 3)
> Label Shape: (3023,)
> Number of Classes: 62
> People: {'Jack Straw', 'Colin Powell', 'Jeremy Greenstock', 'Jennifer Aniston', 'Hans Blix', 'Hugo Chavez', 'Kofi Annan', 'Mahmoud Abbas', 'Jean Chretien', 'Nestor Kirchner', 'David Beckham', 'Hamid Karzai', 'Vicente Fox', 'Laura Bush', 'Tom Daschle', 'Jennifer Lopez', 'Jiang Zemin', 'Alejandro Toledo', 'Guillermo Coria', 'Rudolph Giuliani', 'Bill Clinton', 'Lindsay Davenport', 'Ariel Sharon', 'Lleyton Hewitt', 'Andre Agassi', 'George Robertson', 'Igor Ivanov', 'John Negroponte', 'Carlos Menem', 'Junichiro Koizumi', 'Jennifer Capriati', 'Gerhard Schroeder', 'Recep Tayyip Erdogan', 'Tiger Woods', 'John Ashcroft', 'Ricardo Lagos', 'Arnold Schwarzenegger', 'Megawati Sukarnoputri', 'Jacques Chirac', 'George W Bush', 'Paul Bremer', 'Tom Ridge', 'Amelie Mauresmo', 'Juan Carlos Ferrero', 'Roh Moo-hyun', 'Donald Rumsfeld', 'Vladimir Putin', 'Alvaro Uribe', 'Saddam Hussein', 'Naomi Watts', 'Atal Bihari Vajpayee', 

Split Data into training, test, and validation sets

In [10]:
_, X_test, _, y_test = train_test_split(images_selected, labels_selected, test_size=0.30)

### Tensorboard

Tensorboard parameters

In [11]:
tensorboard_directory = r'./tmp/tensorboard/014'
tensorboard_paths = [r'C:\Users\parth\Documents\GitHub\Facial-Recognition\tmp\tensorboard\014']
tensorboard_names = ['model']

Tensorboard command

In [12]:
Tensorboard.make(paths=tensorboard_paths,
                 names=tensorboard_names,
                 host='127.0.0.1',
                 _print=True)

> tensorboard --logdir=model:C:\Users\parth\Documents\GitHub\Facial-Recognition\tmp\tensorboard\014 --host 127.0.0.1


'tensorboard --logdir=model:C:\\Users\\parth\\Documents\\GitHub\\Facial-Recognition\\tmp\\tensorboard\\014 --host 127.0.0.1'

### Tunable Training Parameters

#### Conv2d Params
* filters : Integer, dimensionality of the output space (ie. the number of filters in the convolution)
* kernel_size : An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window.  Can be a single integer to specify the same value for all spatial dimensions
* strides : An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width.  Can be a single integer to specify the same value for all spatial dimensions

In [13]:
conv2d_specifications = [[{'filters': 64, 'kernel_size': [3, 3], 'strides': (1, 1)}],
                         [{'filters': 64, 'kernel_size': [5, 5], 'strides': (1, 1)}],
                         [{'filters': 64, 'kernel_size': [7, 7], 'strides': (1, 1)}]]

#### Max Pool Params
* pool_size : An integer or tuple/list of 2 integers: (pool_height, pool_width) specifying the size of the pooling window.  Can be a single integer to specify the same value for all spatial dimensions
* strides : n integer or tuple/list of 2 integers, specifying the strides of the pooling operation.  Can be a single integer to specify the same value for all spatial dimensions

In [14]:
max_pool_specifications = [[{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}],
                           [{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}],
                           [{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}]]

#### Fully Connected & Dense Params

In [15]:
num_dense = 2
fc_parameters = [{'units': 62}, {'units': 62}]

#### Dropout Params
* use : to use dropout in this layer
* rate : dropout rate

In [16]:
dropout_parameters = [{'use': True, 'rate': 0.5},
                      {'use': True, 'rate': 0.5}]

#### Other Params

In [17]:
learning_rate = 0.001  # A const. learning rate is not defined in the model, instead the learning rate changes as the model trains.
epochs = 5000
use_batch_norm = True
use_dropout = True
batch_size = 30

In [18]:
print('> Data Shape: {}'.format(images_selected.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))

> Data Shape: (3023, 63, 63, 3)
> Number of Classes: 62


In [19]:
data_shape = [63, 63, 3]
num_classes = len(set(labels))

### Initalize Model

In [20]:
model = Model(sess=tf.Session(),
              data_shape=data_shape,
              num_classes=num_classes,
              num_dense=num_dense,
              learning_rate=learning_rate,
              use_batch_norm=use_dropout,
              use_dropout=use_dropout,
              conv_parameters=conv2d_specifications,
              max_pool_parameters=max_pool_specifications,
              dropout_parameters=dropout_parameters,
              fc_parameters=fc_parameters,
              tensorboard_directory=tensorboard_directory)

> Input Tensor: [Dimension(None), Dimension(63), Dimension(63), Dimension(3)]
> Layer   1: [Dimension(None), Dimension(61), Dimension(61), Dimension(64)]
> Layer   2: [Dimension(None), Dimension(59), Dimension(59), Dimension(64)]
> Layer   3: [Dimension(None), Dimension(57), Dimension(57), Dimension(64)]
> Fully Connected 1: [Dimension(None), Dimension(62)]
> Dropout Enabled
> Batch Norm Enabled
> Fully Connected 2: [Dimension(None), Dimension(62)]
INFO:tensorflow:Summary name Val Accuracy is illegal; using Val_Accuracy instead.


### Test Model

In [21]:
model.test_data(data=X_test,
                labels=y_test)

In [None]:
predicted, actual = model.test(batch_size=batch_size)

In [None]:
print(predicted, actual)

In [None]:
print(classification_report(actual, predicted, target_names=labels_names_selected))

In [None]:
def title(y_pred, y_test, target_names, i):
    pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
    true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
    return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

prediction_titles = [title(y_pred, y_test, target_names, i)
                         for i in range(y_pred.shape[0])]

n_row=2
n_col=6

pl.figure(figsize=(2 * n_col, 1.9 * n_row))
pl.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.29)
for i in range(n_row * n_col):
    pl.subplot(n_row, n_col, i + 1)
    pl.imshow(X_test[i].reshape((height, width)), cmap=pl.cm.gray)
    pl.title(prediction_titles[i], size=12)
    pl.xticks(())
    pl.yticks(())