# Face Recognition Challenge - Parth S. Patel

### Problem:
We will try to build a classification model that will run through multiple 2d pics of famous celebreties and predict name based on image.

### Solution:

In [1]:
import numpy as np
import random
import tensorflow as tf
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters


In [2]:
from utils.tensorboard import Tensorboard
from model import Model
from utils.augment import Augment
from train import train

### Data
Fetch the data and split into images and labels

In [3]:
dataset = fetch_lfw_people(data_home=None,
                           resize=1.0,
                           color=True,
                           download_if_missing=True,
                           min_faces_per_person=20)

images = dataset.images
labels = dataset.target

One Hot Encode the labels

In [4]:
labels_encoded = np.zeros((len(labels), len(set(labels))))
labels_encoded[np.arange(len(labels)), labels] = 1

Data Description

In [5]:
print('> Data Shape: {}'.format(images.shape))
print('> Label Shape: {}'.format(labels.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))
print('> People: {}'.format(set(dataset.target_names)))
print('> Classes: {}'.format(set(labels)))

> Data Shape: (3023, 125, 94, 3)
> Label Shape: (3023,)
> Number of Classes: 62
> People: {'Junichiro Koizumi', 'Saddam Hussein', 'Alejandro Toledo', 'George W Bush', 'John Ashcroft', 'Tom Daschle', 'David Beckham', 'Jose Maria Aznar', 'Bill Clinton', 'Lleyton Hewitt', 'Vicente Fox', 'Amelie Mauresmo', 'Mahmoud Abbas', 'Donald Rumsfeld', 'Serena Williams', 'Angelina Jolie', 'George Robertson', 'Ariel Sharon', 'Guillermo Coria', 'Jack Straw', 'Jennifer Lopez', 'Megawati Sukarnoputri', 'Alvaro Uribe', 'Roh Moo-hyun', 'Jiang Zemin', 'Arnold Schwarzenegger', 'Nestor Kirchner', 'Andre Agassi', 'Pete Sampras', 'Hamid Karzai', 'Tom Ridge', 'Lindsay Davenport', 'Tiger Woods', 'Jean Chretien', 'Atal Bihari Vajpayee', 'Hans Blix', 'Silvio Berlusconi', 'Igor Ivanov', 'Hugo Chavez', 'Laura Bush', 'Gerhard Schroeder', 'Recep Tayyip Erdogan', 'Carlos Menem', 'Vladimir Putin', 'Jennifer Capriati', 'John Negroponte', 'Michael Bloomberg', 'Gray Davis', 'Jennifer Aniston', 'Naomi Watts', 'Kofi Annan', '

### Augmentation
Augment the data through:
* random cropping of the image from 125x94 to 63x63 pixels
* randomly modifying the hue, contrast, brightness
* randomly flip images horizontally

*Warning: Augmentation takes a long time.*

Crop the images twice to double the data size

In [6]:
aug = Augment()
croped = []
for _ in range(2):
    croped.append(aug.randomCropAll(images, 63, 63))

Crop, modify hue, contrast, brightness, and flip the images three times

In [None]:
a1 = aug.augment(images=images,
                 operations=['flip_h', 'brightness', 'hue'],
                 width=63,
                 height=63)

a2 = aug.augment(images=images,
                 operations=['flip_h', 'hue'],
                 width=63,
                 height=63)

a3 = aug.augment(images=images,
                 operations=['flip_h', 'contrast', 'hue'],
                 width=63,
                 height=63)

> Augmented images with ['flip_h', 'brightness', 'hue']


Combine the augmented images

In [None]:
combined_augments = (croped[0], croped[1], a1, a2, a3)

images_selected = np.concatenate(combined_augments, axis=0)
labels_selected = np.concatenate([labels_encoded for _ in range(len(combined_augments))], axis=0)

Ensure the images and labels are numpy arrays

In [None]:
if type(images_selected).__module__ is not np.__name__:
    print('> Converting images to a numpy array')
    images_selected = np.array(images_selected)

if type(labels_selected).__module__ is not np.__name__:
    print('> Converting labels to a numpy array')
    labels_selected = np.array(labels_selected)

Augmented Data Description

In [None]:
print('> Data Shape: {}'.format(images.shape))
print('> Label Shape: {}'.format(labels.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))
print('> People: {}'.format(set(dataset.target_names)))
print('> Classes: {}'.format(set(labels)))

Split Data into training, test, and validation sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images_selected, labels_selected, test_size=0.30)
X_train, X_val, y_train, y_val = train_test_split(images_selected, labels_selected, test_size=0.30)

### Tensorboard

Tensorboard parameters

In [None]:
tensorboard_directory = r'./tmp/tensorboard/015'
tensorboard_paths = [r'C:\Users\parth\Documents\GitHub\Facial-Recognition\tmp\tensorboard\015']
tensorboard_names = ['model']

Tensorboard command

In [None]:
Tensorboard.make(paths=tensorboard_paths,
                 names=tensorboard_names,
                 host='127.0.0.1',
                 _print=True)

### Tunable Training Parameters

#### Conv2d Params
* filters : Integer, dimensionality of the output space (ie. the number of filters in the convolution)
* kernel_size : An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window.  Can be a single integer to specify the same value for all spatial dimensions
* strides : An integer or tuple/list of 2 integers, specifying the strides of the convolution along the height and width.  Can be a single integer to specify the same value for all spatial dimensions

In [None]:
conv2d_specifications = [[{'filters': 64, 'kernel_size': [3, 3], 'strides': (1, 1)}],
                         [{'filters': 64, 'kernel_size': [5, 5], 'strides': (1, 1)}],
                         [{'filters': 64, 'kernel_size': [7, 7], 'strides': (1, 1)}]]

#### Max Pool Params
* pool_size : An integer or tuple/list of 2 integers: (pool_height, pool_width) specifying the size of the pooling window.  Can be a single integer to specify the same value for all spatial dimensions
* strides : n integer or tuple/list of 2 integers, specifying the strides of the pooling operation.  Can be a single integer to specify the same value for all spatial dimensions

In [None]:
max_pool_specifications = [[{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}],
                           [{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}],
                           [{'use': True, 'pool_size': [3, 3], 'strides': [1, 1]}]]

#### Fully Connected & Dense Params

In [None]:
num_dense = 2
fc_parameters = [{'units': 62}, {'units': 62}]

#### Dropout Params
* use : to use dropout in this layer
* rate : dropout rate

In [None]:
dropout_parameters = [{'use': True, 'rate': 0.5},
                      {'use': True, 'rate': 0.5}]

#### Other Params

In [None]:
learning_rate = 0.001  # A const. learning rate is not defined in the model, instead the learning rate changes as the model trains.
epochs = 5000
use_batch_norm = True
use_dropout = True
batch_size = 30

In [None]:
print('> Data Shape: {}'.format(images_selected.shape))
print('> Number of Classes: {}'.format(len(set(dataset.target_names))))

In [None]:
data_shape = [63, 63, 3]
num_classes = len(set(labels))

### Initalize Model

In [None]:
model = Model(sess=tf.Session(),
              data_shape=data_shape,
              num_classes=num_classes,
              num_dense=num_dense,
              learning_rate=learning_rate,
              use_batch_norm=use_dropout,
              use_dropout=use_dropout,
              conv_parameters=conv2d_specifications,
              max_pool_parameters=max_pool_specifications,
              dropout_parameters=dropout_parameters,
              fc_parameters=fc_parameters,
              tensorboard_directory=tensorboard_directory)

In [None]:
model.train_data(data=X_train,
                 labels=y_train)

In [None]:
model.val_data(data=X_val,
               labels=y_val)

In [None]:
model.test_data(data=X_test,
                labels=y_test)

### Train Model

In [None]:
model.train(batch_size=batch_size,
            batch_size_val=batch_size,
            epochs=1,
            is_restore=restore)

In [None]:
Test Model