<H2>Training a convolutional net to recognise faces</H2>

The first machine learning method we will try in this experiment is the convolutional neural net (CNN), which is very popular for image processing, as convolutional layers mean it is able to work with large numbers of dimensions, such as in images.

In a previous notebook, we created a dictionary of (82% accurate) labelled faces which was stored in a .txt file, so we'll load this rather than re-calculating. See notebook 4.1-MG92-extract-faces-with-names.ipynb to see the method).

In [163]:
import os
import cv2
import spacy
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from matplotlib import gridspec
from scipy.io import loadmat
from skimage.transform import rescale
from tensorflow.contrib import lookup
from tensorflow.python.platform import gfile

%matplotlib inline

In [3]:
def convert_to_rgb(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [4]:
load_extr = np.load('../data/processed/labelled_faces.txt')
correct_extracts = dict(load_extr[()])
print(len(correct_extracts['Norah Jones']))

10


In [187]:
#Clean the data
#stopwords = ['actor', 'jan', 'out', 'abc', 'ap photo', 'ap ph', 'capital hill']
#act_labels = [word.lower() for word in list(correct_extracts.keys())]

act_labels = str(act_labels).replace('>','').replace('<','').replace('\\','')\
            .replace('`','').replace('\'','').replace('[','').replace(']','')\
            .replace('  ',' ').strip().split(', ')

unq_labels = np.unique(act_labels)
        
clean_labels = [word for word in unq_labels if word.lower()] #not in stopwords and not 
                #word.isdigit()]  

print('Length of original labels: %d \nLength of labels after cleaning: %d'%(len(act_labels),
                                                                            len(clean_labels)))

indcs = [clean_labels.index(word.lower()) for word in act_labels]
one_hot_labels = tf.one_hot(indices= indcs , depth=len(clean_labels))

with tf.Session() as sess:
    o_h_labels =  (sess.run(one_hot_labels))
    
print('Shape of one-hot encoded labels: ', o_h_labels.shape)

Length of original labels: 1234 
Length of labels after cleaning: 1223
Shape of one-hot encoded labels:  (1234, 1223)


<h2>Training the CNN</h2>
First, we must address the fact that all of the face extract images will be different shapes. To do this, we will reshape each image to the smallest shape

In [5]:
celeb_name = 'Norah Jones'

print('Shape of %s image 1: %s'%(celeb_name,np.shape(correct_extracts[celeb_name][0])))
print('Shape of %s image 2: %s'%(celeb_name,np.shape(correct_extracts[celeb_name][1])))
print('Shape of %s image 3: %s'%(celeb_name,np.shape(correct_extracts[celeb_name][2])))

print('Minumum shape of %s images: %s'%(celeb_name,min([len(extr) for extr in correct_extracts[celeb_name]])))


Shape of Norah Jones image 1: (104, 104, 3)
Shape of Norah Jones image 2: (97, 97, 3)
Shape of Norah Jones image 3: (114, 114, 3)
Minumum shape of Norah Jones images: 97


In [6]:
'''reshaped_extracts = {}
for name, imgs in correct_extracts.items():
    print(name)
    min_shape = min([len(extr) for extr in correct_extracts[name]])
    new_imgs = []
    for img in imgs:
        new_img = rescale(img, (len(img)/min_shape))
        new_imgs.append(new_img)
    print('Shape of new images %s'%np.shape(new_imgs))
    reshaped_extracts[name]=new_imgs
    if name=='Bush':
        break'''

"reshaped_extracts = {}\nfor name, imgs in correct_extracts.items():\n    print(name)\n    min_shape = min([len(extr) for extr in correct_extracts[name]])\n    new_imgs = []\n    for img in imgs:\n        new_img = rescale(img, (len(img)/min_shape))\n        new_imgs.append(new_img)\n    print('Shape of new images %s'%np.shape(new_imgs))\n    reshaped_extracts[name]=new_imgs\n    if name=='Bush':\n        break"

In [7]:
img_place = tf.placeholder(tf.float32, [None,512,512,1])
batch_size=1
mtm =0.9
g_dim = 16
z_dim=100
epsilon = 1e-5

In [8]:
def build_cnn(input_img):
    input_layer = tf.reshape(input_img, [-1,64,64,1])
    
    conv1 = tf.layers.conv2d(inputs=input_layer,
                             filters=32,
                             kernel_size=[5, 5],
                             padding="same",
                             activation=tf.nn.relu
                            )
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    conv2 = tf.layers.conv2d(inputs=pool1,
                             filters=64,
                             kernel_size=[5, 5],
                             padding="same",
                             activation=tf.nn.relu
                            )
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    logits = tf.layers.dense(inputs=dropout, units=10)
    

In [9]:
def batch_norm(input_, scop,train=True):
    return tf.contrib.layers.batch_norm(input_, decay=mtm, updates_collections=None,
    epsilon=epsilon,scale=True,is_training=train,scope="batch_norm_{}".format(scop))

In [10]:
def linear(input_, output_dim, scop):
    with tf.variable_scope('lin_{}'.format(scop)) as scope:
        shape = input_.get_shape().as_list()
        matrix = tf.get_variable("Matrix", [shape[1], output_dim], tf.float32,
            tf.random_normal_initializer())
        bias = tf.get_variable("bias",[output_dim],initializer=tf.constant_initializer(0.0))
    return tf.matmul(input_, matrix)+bias, matrix, bias

In [11]:
def conv2d(input_, output_dim, scop, k_h=5, k_w=5):
    with tf.variable_scope('conv{}'.format(scop)) as scope:
        w=tf.get_variable('w{}'.format(scop), [k_h, k_w,input_.get_shape()[-1],output_dim],
            initializer=tf.truncated_normal_initializer(), dtype=tf.float32)
        conv=tf.nn.conv2d(input_, w, strides=[1,2,2,1], padding='SAME')
        b=tf.get_variable('b',[output_dim],initializer=tf.constant_initializer(0.0))
        conv=tf.reshape(tf.nn.bias_add(conv,b),conv.get_shape())
    return conv, w, b

In [12]:
def deconv2d(input_, output_size, scop, k_h=5, k_w=5):
    with tf.variable_scope('deconv{}'.format(scop)) as scope:
        w=tf.get_variable('w{}'.format(scop), [k_h, k_w, output_size[-1], input_.get_shape()[-1] ],
            initializer=tf.random_normal_initializer(), dtype=tf.float32)
        conv=tf.nn.conv2d_transpose(input_, w, output_shape=output_size, strides=[1,2,2,1], padding='SAME')
        b=tf.get_variable('b',[output_size[-1]],initializer=tf.constant_initializer(0.0))
        deconv=tf.reshape(tf.nn.bias_add(conv,b),conv.get_shape())
    return deconv, w, b

In [13]:
def generator(z, img_w, img_h, img_d):
    #z0,w0,b0=linear(z, img_h*img_w*img_d*g_dim*8/(16*16*16), 'gen') #None * 144*9*9*1*8 = 5832
    h0=tf.reshape(z, [-1, int(img_w/16), int(img_h/16), g_dim*8]) #9*9*1*8*14 = 5832
    h0 = tf.nn.relu(batch_norm(h0, "h0"))

    c1, w1, b1 = deconv2d(h0, [batch_size, int(img_w/8), int(img_h/8), g_dim*4],'_1' ) #10*18*18*18*576 = 33592320
    h1= tf.nn.relu(batch_norm(c1,"h1"))

    c2, w2, b2 = deconv2d(h1, [batch_size, int(img_w/4), int(img_h/4), g_dim*2],'_2' )
    h2 =tf.nn.relu(batch_norm(c2,"h2"))

    c3, w3, b3 = deconv2d(h2, [batch_size, int(img_w/2), int(img_h/2), g_dim],'_3' )
    h3 =tf.nn.relu(batch_norm(c3,"h3"))

    c4, w4, b4 = deconv2d(h3, [batch_size, img_w, img_h, 1],'_4' )
    h4=tf.nn.tanh(c4)

    print("shape h4",tf.shape(h4))
    theta_g = [w1, w2, w3, w4, b1, b2, b3, b4]
    return h4, theta_g

  warn("The default mode, 'constant', will be changed to 'reflect' in "


64 64 3
shape h4 Tensor("Shape:0", shape=(4,), dtype=int32)
(1, 64, 64, 1)


In [192]:
def build_cnn(x,img_w, img_h, reuse=False):
    if reuse:
        tf.get_variable_scope().reuse_variables()
    x=tf.reshape(x, shape=[batch_size,img_w, img_h,1])
    x=tf.cast(x, tf.float32)

    c1, w1, b1=conv2d(x ,16, '_1')
    h1=tf.nn.relu(batch_norm(c1,"d_h1"))

    c2, w2, b2=conv2d(h1,32, '_2')
    h2=tf.nn.relu(batch_norm(c2,"d_h2"))

    c3, w3, b3=conv2d(h2,64, '_3')
    h3=tf.nn.relu(batch_norm(c3,"d_h3"))
    h3=tf.reshape(h3, [batch_size,-1])

    l4, w4, b4=linear(h3,1223, 'dis')
    h4=tf.nn.sigmoid(l4)
    
    #theta_d=[w1, w2, w3, w4, b1, b2, b3, b4]
    return h4,l4


In [193]:
example_img = rescale(correct_extracts[celeb_name][1], (64/len(correct_extracts[celeb_name][1])))
img_w, img_h, img_d = np.shape(example_img)[0], np.shape(example_img)[1], np.shape(example_img)[2]
print(img_w, img_h, img_d)
z = tf.placeholder(tf.float32, shape=[None,z_dim])
g_z, theta_g = generator(z, img_w, img_h, img_d)
print(np.shape(g_z))

  warn("The default mode, 'constant', will be changed to 'reflect' in "


64 64 3
shape h4 Tensor("Shape:0", shape=(4,), dtype=int32)
(1, 64, 64, 1)


In [194]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None]+[img_h,img_w])
d_x, logits_x = build_cnn(x,img_w, img_h)
print(d_x)

Tensor("Sigmoid:0", shape=(1, 1223), dtype=float32)


In [195]:
loss = tf.losses.softmax_cross_entropy(onehot_labels=o_h_labels[1], logits=logits_x)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss=loss,
                              global_step=tf.train.get_global_step())

ValueError: Shapes (1, 1223) and (1223,) are incompatible

In [16]:
sess= tf.Session()