# DogFaceNet version 2

### Imports

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import os
import numpy as np
import skimage as sk
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook

In [2]:
PATH = '../data/dogfacenet/'
PATH_IMAGES = PATH + 'images/'
PATH_RESIZED = PATH + 'resized/'

# Size of the input image into the network
SIZE = (100,100,3)

TEST_SPLIT = 0.05

### Dataset preprocessing
- Get the dataset from folders
- Associate the corresponding classes
- Resized the dataset
- Shuffle the dataset?
- Divide the dataset into validation, training and testing?

In [3]:
def square_crop(image):
    """
    Takes the largest between height and width of the image and crops it into a square.
    This square is located in the middle of the image.
    """
    
    h,w,c = image.shape
    
    if w > h:
        margin = w - h
        margin = margin // 2
        image = image[:,margin:margin+h,:]
    elif w < h:
        margin = h - w
        margin = margin // 2
        image = image[margin:margin+w,:,:]
    return image

Resize pictures

In [7]:
w, h, c = SIZE

label = 0

filenames = os.listdir(PATH_IMAGES)

# Just save the pictures after resizing them
for i in tqdm_notebook(range(598+869,len(filenames))):
    for file in os.listdir(PATH_IMAGES + filenames[i]):
        label += 1
        
        # Read and resized image
        image = sk.io.imread(PATH_IMAGES + filenames[i] + '/' + file)
        if len(image.shape) == 3:
            image_cropped = square_crop(image)
            image_resized = sk.transform.resize(image_cropped,SIZE)

            # Save image
            ## Check if the good folder exists
            if filenames[i] not in os.listdir(PATH_RESIZED):
                os.mkdir(PATH_RESIZED + filenames[i])
            sk.io.imsave(PATH_RESIZED + filenames[i] + '/' + str(label) + '.jpg', image_resized)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  .format(dtypeobj_in, dtypeobj_out))


Load dataset into memory

In [4]:
filenames = os.listdir(PATH_RESIZED)

i = 0

# Remove unique examples
while i<len(filenames):
    files = os.listdir(PATH_RESIZED + filenames[i])
    if len(files)<=1:
        filenames = filenames[:i] + filenames[i+1:]
    else:
        i += 1

# Compute the number of images
nbof_images = 0
for i in range(0,len(filenames)):
    files = os.listdir(PATH_RESIZED + filenames[i])
    nbof_images += len(files)

print("Number of images: " + str(nbof_images))
    
w, h, c = SIZE

images = np.empty((nbof_images,w,h,c))
labels = np.empty(nbof_images)
        
label = 0

index = 0

# Load images into numpy arrays
for i in tqdm_notebook(range(len(filenames))):
    files = os.listdir(PATH_RESIZED + filenames[i])
    for file in files:
        labels[index] = label
        # Read image
        image = sk.io.imread(PATH_RESIZED + filenames[i] + '/' + file)

        # Add the image to the table
        images[index] = image
        
        index += 1
    label += 1
    
assert len(labels)==len(images)

Number of images: 5568





Divide the dataset into train, valid and test:

To create the validation dataset we use pictures of dogs were there is more than 3 pictures and took one of this picture.

For the testing dataset we simply split the classes. We will then use the network as a one shot learner on this new dataset. With one picture the network will produce one embedding vector. For each embedding vector compute the L2 distance with each face to compute the most propable one

In [5]:
w, h, c = SIZE

nbof_test = int(len(images)*TEST_SPLIT)

images_test = images[:nbof_test]
labels_test = labels[:nbof_test]


# Count valid images:
state = -1
count_valid = 0
count_train = 0
count_image_class = 0

for i in range(nbof_test,len(labels)):
    if state != labels[i]:
        state = labels[i]
        count_image_class = 0
    else:
        count_image_class += 1
    
    if count_image_class == 3:
        count_valid += 1
    else:
        count_train += 1

print("Total number of images: " + str(len(labels)))
print("Number of test images: " + str(len(labels_test)))
print("Number of validation images: " + str(count_valid))
print("Number of training images: " + str(count_train))

images_valid = np.empty((count_valid,w,h,c))
labels_valid = np.empty(count_valid)

images_train = np.empty((count_train,w,h,c))
labels_train = np.empty(count_train)

state = -1
count_valid = 0
count_train = 0
count_image_class = 0

for i in range(nbof_test,len(labels)):
    if state != labels[i]:
        state = labels[i]
        count_image_class = 0
    else:
        count_image_class += 1
    
    if count_image_class == 3:
        # Add the validation image in the validation array
        images_valid[count_valid] = images[i]
        labels_valid[count_valid] = labels[i]
        
        count_valid += 1
    else:
        images_train[count_train] = images[i]
        labels_train[count_train] = labels[i]
        
        count_train += 1

# print(labels)
# print(labels_test)
# print(labels_train)
# print(labels_valid)
print("Is the number of images coherent? " + str(len(labels)==(len(labels_test)+len(labels_train)+len(labels_valid))))

Total number of images: 5568
Number of test images: 278
Number of validation images: 543
Number of training images: 4747
Is the number of images coherent? True


### Define the model
- Define the ArcFace layer
- Try first with an dummy one
- Compile it with the arcface loss and and Adam optimizer

Then use transfer learning with a more complex model

Define the Arcface layer

In [18]:
# My custom layer for arcface
# It takes two inputs: one for the embedding, one for the label

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
import math

# Arcface should only be used for training
class Arcface(Layer):

    def __init__(self, out_num, s = 64., m = 0.5, **kwargs):
        self.output_dim = out_num
        self.s = s
        self.m = m
        super(Arcface, self).__init__(**kwargs)

    def build(self, input_shape, initializer='uniform'):
        assert isinstance(input_shape, list)
        
        # Create a trainable weight variable for this layer.
        self.weights = self.add_weight(name='embedding_weights',
                                                 shape=(input_shape[0][-1],self.out_num),
                                                 initializer=initializer,
                                                 dtype=tf.float32,
                                                 trainable=True)
        super(Arcface, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        assert isinstance(x, list)
        embedding, labels = x
        
        cos_m = math.cos(self.m)
        sin_m = math.sin(self.m)
        mm = sin_m * self.m  # issue 1
        threshold = math.cos(math.pi - self.m)
        
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keepdims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        
        weights_norm = tf.norm(self.weights, axis=0, keepdims=True)
        self.weights = tf.div(self.weights, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, self.weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = self.s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
        
        # this condition controls the theta+m should be in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_t - threshold
        cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

        keep_val = self.s*(cos_t - mm)
        cos_mt_temp = tf.where(cond, cos_mt, keep_val)

        mask = tf.one_hot(labels, depth=self.out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(self.s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output')
        
        return output

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        shape_emb, shape_lab = input_shape
        return [(shape_emb[0], self.out_num), shape_lab[:-1]]

In [None]:
class Dummy(tf.keras.Model):
    def __init__(self, out_num):
        super(Dummy_embedding, self).__init__(name='dummy')
        self.conv1 = tf.keras.layers.Conv2D(10,(3, 3))
        self.pool1 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv2 = tf.keras.layers.Conv2D(20,(3, 3))
        self.pool2 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv3 = tf.keras.layers.Conv2D(40,(3, 3))
        self.pool3 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv4 = tf.keras.layers.Conv2D(80,(3, 3))
        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.dense = tf.layers.Dense(emb_size)
        
        self.arcface = Arcface(out_num)
        self
    
    def __call__(self, input_tensor, training):
        x = self.conv1(input_tensor)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.avg_pool(x)
        x = self.dense(x)
        
        return tf.nn.l2_normalize(x)

### Train it

### Test it on the training/validation dataset to stop the worst examples

### Evaluate it on the test dataset: one shot learning