# DogFaceNet version 2

### Imports

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

import os
import numpy as np
import skimage as sk
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook

In [2]:
PATH = '../data/dogfacenet/'
PATH_IMAGES = PATH + 'images/'
PATH_RESIZED = PATH + 'resized/'

# Size of the input image into the network
SIZE = (100,100,3)

TEST_SPLIT = 0.05
VALID_SPLIT = 0.1

BATCH_SIZE = 64

### Dataset preprocessing
- Get the dataset from folders
- Associate the corresponding classes
- Resized the dataset
- Shuffle the dataset?
- Divide the dataset into validation, training and testing?

In [3]:
def square_crop(image):
    """
    Takes the largest between height and width of the image and crops it into a square.
    This square is located in the middle of the image.
    """
    
    h,w,c = image.shape
    
    if w > h:
        margin = w - h
        margin = margin // 2
        image = image[:,margin:margin+h,:]
    elif w < h:
        margin = h - w
        margin = margin // 2
        image = image[margin:margin+w,:,:]
    return image

Resize pictures

In [7]:
w, h, c = SIZE

label = 0

filenames = os.listdir(PATH_IMAGES)

# Just save the pictures after resizing them
for i in tqdm_notebook(range(598+869,len(filenames))):
    for file in os.listdir(PATH_IMAGES + filenames[i]):
        label += 1
        
        # Read and resized image
        image = sk.io.imread(PATH_IMAGES + filenames[i] + '/' + file)
        if len(image.shape) == 3:
            image_cropped = square_crop(image)
            image_resized = sk.transform.resize(image_cropped,SIZE)

            # Save image
            ## Check if the good folder exists
            if filenames[i] not in os.listdir(PATH_RESIZED):
                os.mkdir(PATH_RESIZED + filenames[i])
            sk.io.imsave(PATH_RESIZED + filenames[i] + '/' + str(label) + '.jpg', image_resized)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  .format(dtypeobj_in, dtypeobj_out))


Load dataset into memory

In [3]:
filenames = os.listdir(PATH_RESIZED)

i = 0

# Remove unique examples
while i<len(filenames):
    files = os.listdir(PATH_RESIZED + filenames[i])
    if len(files)<=1:
        filenames = filenames[:i] + filenames[i+1:]
    else:
        i += 1

# Compute the number of images
nbof_images = 0
for i in range(0,len(filenames)):
    files = os.listdir(PATH_RESIZED + filenames[i])
    nbof_images += len(files)

print("Number of images: " + str(nbof_images))
    
w, h, c = SIZE

images = np.empty((nbof_images,w,h,c))
labels = np.empty(nbof_images, dtype=int)

label = 0

index = 0

# Load images into numpy arrays
for i in tqdm_notebook(range(len(filenames))):
    files = os.listdir(PATH_RESIZED + filenames[i])
    for file in files:
        labels[index] = label
        # Read image
        image = sk.io.imread(PATH_RESIZED + filenames[i] + '/' + file)

        # Add the image to the table
        images[index] = image
        
        index += 1
    label += 1
    
assert len(labels)==len(images)

Number of images: 5568





#### Old method!

Divide the dataset into train, valid and test:

To create the validation dataset we use pictures of dogs were there is more than 3 pictures and took one of this picture.

For the testing dataset we simply split the classes. We will then use the network as a one shot learner on this new dataset. With one picture the network will produce one embedding vector. For each embedding vector compute the L2 distance with each face to compute the most propable one

In [77]:
w, h, c = SIZE

nbof_test = int(len(images)*TEST_SPLIT)

images_test = images[:nbof_test]
labels_test = labels[:nbof_test]


# Count valid images:
state = -1
count_valid = 0
count_train = 0
count_image_class = 0

for i in range(nbof_test,len(labels)):
    if state != labels[i]:
        state = labels[i]
        count_image_class = 0
    else:
        count_image_class += 1
    
    if count_image_class == 3:
        count_valid += 1
    else:
        count_train += 1

print("Total number of images: " + str(len(labels)))
print("Number of test images: " + str(len(labels_test)))
print("Number of validation images: " + str(count_valid))
print("Number of training images: " + str(count_train))

images_valid = np.empty((count_valid,w,h,c))
labels_valid = np.empty(count_valid)

images_train = np.empty((count_train,w,h,c))
labels_train = np.empty(count_train)

state = -1
count_valid = 0
count_train = 0
count_image_class = 0

for i in range(nbof_test,len(labels)):
    if state != labels[i]:
        state = labels[i]
        count_image_class = 0
    else:
        count_image_class += 1
    
    if count_image_class == 3:
        # Add the validation image in the validation array
        images_valid[count_valid] = images[i]
        labels_valid[count_valid] = labels[i]
        
        count_valid += 1
    else:
        images_train[count_train] = images[i]
        labels_train[count_train] = labels[i]
        
        count_train += 1
# print(labels)
# print(labels_test)
# print(labels_train)
# print(labels_valid)
print("Is the number of images coherent? " + str(len(labels)==(len(labels_test)+len(labels_train)+len(labels_valid))))

Total number of images: 5568
Number of test images: 278
Number of validation images: 543
Number of training images: 4747


TypeError: only size-1 arrays can be converted to Python scalars

#### New method
- We divide the validation and test set from the training set with the classic division method: 85 percent training, 10 validating, 5 testing.
- We then computes pairs of images in the validation set and testing set:
 - Some of these pairs are images of the same dog and some are picture of different dogs
 - We create a two lists:
  - A list a images containing the pairs: two successive images are a pair of images. For example, image 0 and is a pair, image 2 and 3 is another pair, etc...
  - A list of boolean called 'issame' indicating if a pair is a pair of images of the same dog or a pair of different dogs. For example, if image 0 and image 1 are showing the same dog value 0 and 1 in the list will be True. On the other hand if the image 2 and 3 represent two different dogs the value 2 and 3 in the list will be at False

In [25]:
w, h, c = SIZE

nbof_test = int(len(images)*TEST_SPLIT)

images_test = images[-nbof_test:]
labels_test = labels[-nbof_test:]

nbof_valid = int(len(images)*VALID_SPLIT)

images_valid = images[-nbof_test-nbof_valid:-nbof_test]
labels_valid = labels[-nbof_test-nbof_valid:-nbof_test]

images_train = images[:-nbof_test-nbof_valid]
labels_train = labels[:-nbof_test-nbof_valid]

print("Total number of images: " + str(len(labels)))
print("Number of test images: " + str(len(labels_test)))
print("Number of validation images: " + str(len(labels_valid)))
print("Number of training images: " + str(len(labels_train)))
print("Number of classes in the training set: " + str(labels_train[-1] - labels_train[0]))


# Creates the pairs

nbof_pairs = (len(images_valid)//2)*2 # it has to be multiple of 2

print("Number of pairs: " + str(nbof_pairs))

pairs = np.empty((nbof_pairs,w,h,c))
issame_in = np.empty(nbof_pairs, dtype=int)
issame_out = np.empty((nbof_pairs,2))

nbof_same = 0

for i in range(0,nbof_pairs,2):
    ## alea_issame will decide if the new pair will be a pair of same dog images or a pair of different
    alea_issame = np.random.rand()

    if alea_issame < 0.5: # Then it will be a pair of same dogs
        # we randomly choose a dog
        choice = np.random.randint(len(labels_valid))
        
        # we extract the images of this class
        chosen_images = list(images_valid[np.equal(labels_valid,labels_valid[choice])])
        
        while len(labels_valid[np.equal(labels_valid,labels_valid[choice])]) < 2:
            choice = np.random.randint(len(labels_valid))
            chosen_images = list(images_valid[np.equal(labels_valid,labels_valid[choice])])
            
        # we then randomly choose two pictures of this class
        choice1 = np.random.randint(len(chosen_images))
        pairs[i] = chosen_images[choice1]
        save = np.copy(chosen_images)
        chosen_images = chosen_images[:choice1] + chosen_images[choice1+1:]
        if len(chosen_images) == 0:
            print("Bug!")
            print(save)
        choice2 = np.random.randint(len(chosen_images))
        pairs[i+1] = chosen_images[choice2]

        issame_out[i] = issame_out[i+1] = [1,0]
        issame_in[i] = issame_in[i+1] = 1
        
        nbof_same += 1
        
    else: # Then it will be a pair of different dogs
        # we randomly choose two dogs
        choice1 = np.random.randint(len(labels_valid))
        
        # we extract the images of the class
        chosen_images = list(images_valid[np.equal(labels_valid,labels_valid[choice1])])
        
        # we choose an image of this class
        choice = np.random.randint(len(chosen_images))
        #print(choice)
        pairs[i] = images_valid[choice]
        
        choice2 = np.random.randint(len(labels_valid))
        
        # check if we have two different classes
        while labels_valid[choice2] == labels_valid[choice1]:
            choice2 = np.random.randint(len(labels_valid))
        
        chosen_images = list(images_valid[np.equal(labels_valid,labels_valid[choice2])])
        
        # we choose an image of this class
        choice = np.random.randint(len(chosen_images))
        
        pairs[i+1] = images_valid[choice]
        
        issame_out[i] = issame_out[i+1] = [0,1]
        issame_in[i] = issame_in[i+1] = 0

print("Number of same images: " + str(nbof_same))
print("Number of validation images: " + str(len(labels_valid)))

Total number of images: 5568
Number of test images: 278
Number of validation images: 556
Number of training images: 4734
Number of classes in the training set: 1300
Number of pairs: 556
Number of same images: 133
Number of validation images: 556


In [26]:
NBOF_CLASSES = max(labels_train)+1
labels_train = tf.keras.utils.to_categorical(labels_train,NBOF_CLASSES)
labels_valid = tf.keras.utils.to_categorical(labels_valid-NBOF_CLASSES,NBOF_CLASSES)

### Define the model
- Define the ArcFace layer
- Define the dummy model first
- Compile it with the softmax loss and and Adam optimizer
- Then use transfer learning with a more complex model

Define the Arcface layer

In [6]:
# My custom layer for arcface
# It takes two inputs: one for the embedding, one for the label

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
import math

# Arcface should only be used for training
class Arcface(Layer):

    def __init__(self, out_num, s = 64., m = 0.5, **kwargs):
        self.out_num = out_num
        self.s = s
        self.m = m
        super(Arcface, self).__init__(**kwargs)

    def build(self, input_shape, initializer='uniform'):
        assert isinstance(input_shape, list)
        
        shape = tf.TensorShape((input_shape[0][-1],self.out_num))
        print(shape)
        
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel',
                                                 shape=shape,
                                                 initializer=initializer,
                                                 dtype=tf.float32,
                                                 trainable=True)
        super(Arcface, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        assert isinstance(x, list)
        embedding, labels = x
        
        cos_m = math.cos(self.m)
        sin_m = math.sin(self.m)
        mm = sin_m * self.m  # issue 1
        threshold = math.cos(math.pi - self.m)
        
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keepdims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        
        weights_norm = tf.norm(self.kernel, axis=0, keepdims=True)
        weights = tf.div(self.kernel, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = self.s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
        
        # this condition controls the theta+m should be in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_t - threshold
        cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

        keep_val = self.s*(cos_t - mm)
        cos_mt_temp = tf.where(cond, cos_mt, keep_val)

        mask = tf.one_hot(labels, depth=self.out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(self.s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output')
        
        return output

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        shape_emb, shape_lab = input_shape
        shape_emb[-1] = self.out_num
        return tf.TensorShape(shape_emb)
    
#         shape = tf.TensorShape(input_shape).as_list()
#         shape[-1] = self.num_classes
#         return tf.TensorShape(shape)


In [7]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer
import math

# Arcface should only be used for training
class Arcface(Layer):

    def __init__(self, out_num, s = 64., m = 0.5, **kwargs):
        self.out_num = out_num
        self.s = s
        self.m = m
        super(Arcface, self).__init__(**kwargs)

    def build(self, input_shape, initializer='uniform'):
        assert isinstance(input_shape, list)
        
        shape = tf.TensorShape((input_shape[0][-1],self.out_num))
        print(shape)
        
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel',
                                                 shape=shape,
                                                 initializer=initializer,
                                                 dtype=tf.float32,
                                                 trainable=True)
        super(Arcface, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        assert isinstance(x, list)
        embedding, labels = x
        
        cos_m = math.cos(self.m)
        sin_m = math.sin(self.m)
        mm = sin_m * self.m  # issue 1
        threshold = math.cos(math.pi - self.m)
        
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keepdims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        
        weights_norm = tf.norm(self.kernel, axis=0, keepdims=True)
        weights = tf.div(self.kernel, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = self.s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
        
        # this condition controls the theta+m should be in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_t - threshold
        cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

        keep_val = self.s*(cos_t - mm)
        cos_mt_temp = tf.where(cond, cos_mt, keep_val)
        
        labels_int = tf.cast(labels,tf.int32, name='labels_int')
        mask = tf.one_hot(labels_int, depth=self.out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(self.s, cos_t, name='scalar_cos_t')
        mul1 = tf.multiply(s_cos_t, inv_mask)
        print(mul1.shape)
        mul2 = tf.multiply(cos_mt_temp, mask)
        print(mul2.shape)
        output = tf.add(mul1, mul2, name='arcface_loss_output')
        print(output.shape)
        print(cos_mt_temp.shape)
        return output

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        shape_emb, shape_lab = input_shape
        shape_emb[-1] = self.out_num
        return tf.TensorShape(shape_emb)

In [70]:

class MyLayer(Layer):

    def __init__(self, output_dim, s = 64., m = 0.5, **kwargs):
        self.out_num = output_dim
        self.m = m
        self.s = s
        
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        shape = (input_shape[1].value, self.out_num)
        print(shape)
        self.kernel = self.add_weight(name='kernel', 
                                      shape=shape,
                                      initializer='uniform',
                                      trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        #assert isinstance(x, list)
        embedding = x
        
        cos_m = math.cos(self.m)
        sin_m = math.sin(self.m)
        mm = sin_m * self.m  # issue 1
        threshold = math.cos(math.pi - self.m)
        
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keepdims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        
        weights_norm = tf.norm(self.kernel, axis=0, keepdims=True)
        weights = tf.div(self.kernel, weights_norm, name='norm_weights')
        #print(self.weights)
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = self.s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
        
        # this condition controls the theta+m should be in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_t - threshold
        cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

        keep_val = self.s*(cos_t - mm)
        cos_mt_temp = tf.where(cond, cos_mt, keep_val)

        #mask = tf.one_hot(labels, depth=self.out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        #inv_mask = tf.subtract(1., mask, name='inverse_mask')

        
        s_cos_t = tf.multiply(self.s, cos_t, name='scalar_cos_t')
        
        return s_cos_t

    def compute_output_shape(self, input_shape):
        return (input_shape[0].value, self.output_dim)

Define the validation layer:
- The bigger the validation batch the better it is (no less than 64 pictures -> 32 pairs)
- It computes the ROC curve
- Finds the best threshold
- Returns a list of 2D vectors [1,0] if the pair was the same dog, [0,1] if it was a different dog

In [47]:
import time
# tests unitaires

tf.reset_default_graph()
emb_raw = tf.constant([[12.,2],[8,4],[3,8],[2,10],[50,10],[10,30]])

actual_issame = tf.constant([1.,1,1,1,0,0])
emb_ = tf.math.l2_normalize(emb_raw,0)

# Normalizes
emb = tf.math.l2_normalize(emb_,0)

# Separates the pairs
emb1 = emb[0::2]
emb2 = emb[1::2]

# Computes distance between pairs
diff = tf.squared_difference(emb1,emb2)
dist = tf.reduce_sum(diff,1)

dist = tf.reshape(tf.stack([dist,dist], axis=-1), [-1])

best_threshold = 0
#for t in np.arange(0,1,0.001):
t = 0.01

def fn(t):
    less = tf.less(dist,t)

    actual_issame_bool = tf.cast(actual_issame,dtype=tf.bool)
    acc = tf.logical_not(tf.logical_xor(less,actual_issame_bool))
    acc = tf.cast(acc,tf.float32)
    out = tf.reshape(tf.reduce_sum(acc),[])
    return out


thresholds = tf.range(0,1,0.001)
apply_t = tf.map_fn(fn, thresholds)
best_t = tf.argmax(apply_t)

best = thresholds[best_t]

# Redo the manipulation with the best threshold
less = tf.less(dist,best)
less = tf.cast(less,tf.int32)
less = tf.map_fn(lambda x : (1-x) * [0,1] + x * [1,0], less)


# # Creates different threshold in order to find the best one
# np_threshold = np.vstack([np.arange(0,1,0.01)]*3).T

# # Reshapes the distance
# dist2 = tf.stack([dist]*len(np_threshold))

# # Reshapes the true values
# actual_issame = tf.constant([[True,True,False]]*len(np_threshold),dtype=bool)

# threshold = tf.constant(np_threshold, dtype=tf.float32)

# # Uses the created thresholds to compute the predictions
# predict_issame = tf.less(dist2,threshold)

# # Computes the accuracy
# truth = tf.logical_not(tf.logical_xor(predict_issame, actual_issame))

# r = tf.reduce_sum(tf.cast(truth,tf.float32),1)

# # Finds the best accuracy with respect to the threshold
# m = tf.argmax(r)

# # best_threshold = threshold[m]
# # accuracy = r[m]/3

# # Ouputs the best output and reshapes the output in a softmax way
# bool_output = predict_issame[m]
# int_output = tf.cast(bool_output,tf.int32)
# output = tf.map_fn(lambda x : (1-x) * [0,1] + x * [1,0], int_output)



    
with tf.Session() as sess:
    t1 = time.time()
    dist_ = sess.run([less])
    t2 = time.time()
    print(t2-t1)
    print(dist_)

0.8407671451568604
[array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1]])]


In [61]:
import time
# tests unitaires

tf.reset_default_graph()
emb_raw = tf.constant([[12.,2],[8,4],[3,8],[2,10],[50,10],[10,30]])
emb_ = tf.math.l2_normalize(emb_raw,0)

# Normalizes
emb = tf.math.l2_normalize(emb_,0)

# Separates the pairs
emb1 = emb[0::2]
emb2 = emb[1::2]

# Computes distance between pairs
diff = tf.squared_difference(emb1,emb2)
dist = tf.reduce_sum(diff,1)



# Creates different threshold in order to find the best one
np_threshold = np.vstack([np.arange(0,1,0.01)]*3).T

# Reshapes the distance
dist2 = tf.stack([dist]*len(np_threshold))

# Reshapes the true values
actual_issame = tf.constant([[True,True,False]]*len(np_threshold),dtype=bool)

threshold = tf.constant(np_threshold, dtype=tf.float32)

# Uses the created thresholds to compute the predictions
predict_issame = tf.less(dist2,threshold)

# Computes the accuracy
truth = tf.logical_not(tf.logical_xor(predict_issame, actual_issame))

r = tf.reduce_sum(tf.cast(truth,tf.float32),1)

# Finds the best accuracy with respect to the threshold
m = tf.argmax(r)

# best_threshold = threshold[m]
# accuracy = r[m]/3

# Ouputs the best output and reshapes the output in a softmax way
bool_output = predict_issame[m]
int_output = tf.cast(bool_output,tf.int32)
output = tf.map_fn(lambda x : (1-x) * [0,1] + x * [1,0], int_output)



    
with tf.Session() as sess:
    t1 = time.time()
    act,pred,a_ = sess.run([actual_issame,truth,output])
    t2 = time.time()
    print(t2-t1)
    print(a_)

9.258671522140503
[[ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True False]
 [ True  True Fa

In [34]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer

# Should only be used for validating
class Validation(Layer):

    def __init__(self, **kwargs):
        super(Validation, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        self.emb_shape = input_shape[0]
        super(Validation, self).build(input_shape)

    def call(self, x):
        """
        Inputs: a tuple containing the embeddings and the issame list
        - embeddings: shape=(batch_size, embedding_size), type=float
        - issame: shape=(batch_size), type=bool
        
        Outputs: a tensor of shape=(batch_size,2), the ouput is either [1,0] (is same) or [0,1] (is different)
        """
        assert isinstance(x, list)
        
        embeddings, issame = x
        self.emb_shape = embeddings.shape
        print(self.emb_shape)

        emb = tf.math.l2_normalize(embeddings,0)
        # emb contains a list of pictures
        # pictures with an even index are first pictures of the pairs
        # pictures with an odd index are second pictures of the pairs
        emb1 = emb[0::2]
        emb2 = emb[1::2]
        
        # Compute the distance for each pair of vector
        dist = tf.reduce_sum(tf.squared_difference(emb1,emb2),1)
        
        dist = tf.reshape(tf.stack([dist,dist], axis=-1), [-1])


        def fn(t):
            less = tf.less(dist,t)

            actual_issame_bool = tf.cast(issame,dtype=tf.bool)
            acc = tf.logical_not(tf.logical_xor(less,actual_issame_bool))
            acc = tf.cast(acc,tf.float32)
            out = tf.reshape(tf.reduce_sum(acc),[])
            return out


        thresholds = tf.range(0,1,0.001)
        apply_t = tf.map_fn(fn, thresholds)
        best_t = tf.argmax(apply_t)

        best = thresholds[best_t]

        # Redo the manipulation with the best threshold
        less = tf.less(dist,best)
        less = tf.cast(less,tf.int32)
        output = tf.map_fn(lambda x : (1-x) * [0,1] + x * [1,0], less)
        
        return output

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        emb_shape, _ = input_shape
        return (BATCH_SIZE, 2)

Define the model

In [115]:
class Dummy(tf.keras.Model):
    def __init__(self, out_num, emb_size = 32):
        """
        -emb_size: size of the embedding
        -out_num: number of identities in the 
        """
        super(Dummy, self).__init__(name='dummy')
        self.conv1 = tf.keras.layers.Conv2D(10,(3, 3))
        self.pool1 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv2 = tf.keras.layers.Conv2D(20,(3, 3))
        self.pool2 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv3 = tf.keras.layers.Conv2D(40,(3, 3))
        self.pool3 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv4 = tf.keras.layers.Conv2D(80,(3, 3))
        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.dense = tf.layers.Dense(emb_size)
        
        self.arcface = Arcface(out_num)
        self.validation = Validation()
    
    def call(self, input_tensor, training):
        images, labels, issame = input_tensor
        x = self.conv1(images)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.avg_pool(x)
        x = self.dense(x)
        embeddings = tf.math.l2_normalize(x)
        
        if traning:
            output = self.arcface((embeddings,labels))
        else:
            output = self.validation((embeddings,issame))
        
        return output

In [143]:
class MoreDummy(tf.keras.Model):
    def __init__(self, out_num, emb_size = 32):
        """
        -emb_size: size of the embedding
        -out_num: number of identities in the 
        """
        super(MoreDummy, self).__init__(name='more_dummy')
        self.conv1 = tf.keras.layers.Conv2D(10,(3, 3))
        self.pool1 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv2 = tf.keras.layers.Conv2D(20,(3, 3))
        self.pool2 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv3 = tf.keras.layers.Conv2D(40,(3, 3))
        self.pool3 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv4 = tf.keras.layers.Conv2D(80,(3, 3))
        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.dense = tf.layers.Dense(emb_size)

        self.arcface = Arcface(out_num)
    
    def call(self, input_tensor):
        images, labels = input_tensor
        x = self.conv1(images)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.avg_pool(x)
        x = self.dense(x)
        embeddings = tf.math.l2_normalize(x)
        
        output = self.arcface([embeddings,labels])
        
        return output
    
#     def compute_output_shape(self, input_shape):
#         # You need to override this function if you want to use the subclassed model
#         # as part of a functional-style model.
#         # Otherwise, this method is optional.
#         shape = tf.TensorShape(self.out_num).as_list()
#         shape[-1] = self.num_classes
#         return tf.TensorShape(shape)


In [146]:
model = MoreDummy(1300)

#### tests unitaires

In [44]:
 #tf.reset_default_graph()
class EvenMoreDummy(tf.keras.Model):
    def __init__(self, emb_size = 32):
        """
        -emb_size: size of the embedding
        -out_num: number of identities in the 
        """
        super(EvenMoreDummy, self).__init__(name='even_more_dummy')
        self.conv1 = tf.keras.layers.Conv2D(10,(3, 3))
        self.pool1 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv2 = tf.keras.layers.Conv2D(20,(3, 3))
        self.pool2 = tf.keras.layers.MaxPooling2D((2, 2))
        self.conv3 = tf.keras.layers.Conv2D(40,(3, 3))
        self.pool3 = tf.keras.layers.MaxPooling2D((2, 2))
        #self.conv4 = tf.keras.layers.Conv2D(80,(3, 3))
        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.dense = tf.layers.Dense(emb_size)
        
        #self.arcface = Arcface(1300, name='output')
        #self.mylayer = MyLayer(1300, name='output')
        self.f = tf.layers.Dense(1301, name='out')
        #self.out = tf.layers.Dense(2, trainable=False, name='test')
        self.valid = Validation()
        #self.poopool = tf.keras.layers.MaxPooling1D(2)
    
    def call(self, input_tensor, training=True):
        images,labels,issame = input_tensor
        x = self.conv1(images)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.avg_pool(x)
        x = self.dense(x)
        emb = tf.math.l2_normalize(x)
        
        if not training:
            issame = self.valid([emb,issame])
        
        return [self.f(emb),issame]
#         if training:
#             #output = self.arcface([emb,labels])
#             #output = self.mylayer(emb)
#             #output = self.out(emb)
#             return self.f(emb)
#         else:
#             return self.out(emb)
#             output = self.valid([emb,issame])
            
        
    
#     def compute_output_shape(self, input_shape):
#         # You need to override this function if you want to use the subclassed model
#         # as part of a functional-style model.
#         # Otherwise, this method is optional.
#         shape = tf.TensorShape(self.out_num).as_list()
#         shape[-1] = self.num_classes
#         return tf.TensorShape(shape)

model = EvenMoreDummy(32)
model.compile(optimizer=tf.train.AdamOptimizer(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

### Train it

In [45]:
issame_train = np.zeros((len(labels_train),2))
issame_train[:,0] = 1

In [46]:
model.fit(
    [images_train,labels_train,issame_train],
    [labels_train,issame_train],
    epochs=20,
    batch_size=64
#     validation_data=([images_valid,labels_valid,issame_in],[labels_valid,issame_out])
)

(?, 32)


TypeError: Input 'y' of 'Mul' Op has type float64 that does not match type int32 of argument 'x'.

In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            multiple                  280       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 multiple                  0         
_________________________________________________________________
conv2d_7 (Conv2D)            multiple                  1820      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 multiple                  0         
_________________________________________________________________
conv2d_8 (Conv2D)            multiple                  7240      
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 multiple                  0         
_________________________________________________________________
global_average_pooling2d_2 ( multiple                  0         
__________

### Test it on the training/validation dataset to stop the worst examples

In [97]:
predict = model.predict([images_test[0:1],labels_test[0:1],issame_in[0:1]])
predict

array([[0.6868594, 0.7891156]], dtype=float32)

### Evaluate it on the test dataset: one shot learning