## First, I'll load the required data and setup keras settings

In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import numpy as np
# use non standard flow_from_directory
from utils.image_preprocessing_ver2 import ImageDataGenerator
# it outputs y_batch that contains embeddings

from utils.mobilenet import get_mobilenet
import keras
from keras.applications.mobilenet import MobileNet
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.models import Model
from keras import optimizers
from keras import backend as K
from keras.layers import Lambda, concatenate, Activation
from keras.losses import categorical_crossentropy as logloss
from keras.metrics import categorical_accuracy, top_k_categorical_accuracy
import matplotlib.pyplot as plt
from keras.layers import Activation, GlobalAveragePooling2D, Dropout, Dense, Input
%matplotlib inline

In [9]:
logit_dir = 'data/face_emb_logits/'

In [10]:
train_logits = np.load(logit_dir + 'train_set.npy')[()]
val_logits = np.load(logit_dir + 'test_set.npy')[()]
print(len(train_logits))

12020


In [11]:
data_dir = "/home/prudhvi/Desktop/Datasets/lfw/lfw_mtcnn_160_split/"

In [16]:
data_generator = ImageDataGenerator(
    data_format='channels_last',
)

# note: i'm also passing dicts of logits
train_generator = data_generator.flow_from_directory(
    data_dir + 'train/', train_logits,
    target_size=(160, 160),
    batch_size=64,
    class_mode = 'embedding',
)

val_generator = data_generator.flow_from_directory(
    data_dir + 'test', val_logits,
    target_size=(160, 160),
    batch_size=64,
    class_mode = 'embedding',
)

Found 12020 images belonging to 1 classes.
Found 1213 images belonging to 1 classes.


## Create your Mobilenet model

In [20]:
temperature = 5.0

In [28]:
def get_mobilenet(dropout, input_size = 160, alpha = 1 ):
    
    input_shape = (input_size, input_size, 3)
    base_model = MobileNet(
        include_top=False, weights='imagenet', 
        input_shape=input_shape, alpha=alpha
    )
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    logits = Dense(512)(x)
    l2_norm = Lambda(lambda  x: K.l2_normalize(x,axis=1))(logits)
    model = Model(base_model.input, l2_norm)
    
    for layer in model.layers[:-2]:
        layer.trainable = False
    return model

model = get_mobilenet(dropout = 1e-3)
print(model.summary())

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_160_tf_no_top.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 160, 160, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 162, 162, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 80, 80, 32)        864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 80, 80, 32)        128       
_________________________________________________________________
conv1_relu (Activation)      (None, 80, 80, 32)        0         
_________________________________________________________________
conv_pad_1 (ZeroPadding2D)   (None, 82, 82, 32)        0         
_________________

In [29]:
def knowledge_distillation_loss(y_true, y_pred, lambda_const):    
    
    # split in 
    #    onehot hard true targets
    #    logits from xception
    y_true, logits = y_true[:, :256], y_true[:, 256:]
    
    # convert logits to soft targets
    y_soft = K.softmax(logits/temperature)
    
    # split in 
    #    usual output probabilities
    #    probabilities made softer with temperature
    y_pred, y_pred_soft = y_pred[:, :256], y_pred[:, 256:]    
    
    return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)

In [30]:
def accuracy(y_true, y_pred):
    return categorical_accuracy(y_true, y_pred)

In [31]:
# logloss with only soft probabilities and targets
def soft_logloss(y_true, y_pred):      
    return logloss(y_true, y_pred)

In [32]:
lambda_const = 0.07

model.compile(
    optimizer=optimizers.SGD(lr=1e-1, momentum=0.9, nesterov=True), 
    loss='mean_squared_error', 
    metrics=[accuracy, soft_logloss]
)

In [33]:
model.fit_generator(
    train_generator, 
    steps_per_epoch=400, epochs=30, verbose=1,
    callbacks=[
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=2, epsilon=0.007)
    ],
    validation_data=val_generator, validation_steps=80, workers=4
)



Epoch 1/30
Epoch 2/30
Epoch 3/30
 11/400 [..............................] - ETA: 33s - loss: 0.0035 - accuracy: 0.0057 - soft_logloss: 1.9738

KeyboardInterrupt: 