In [1]:
import tensorflow as tf
import numpy as np
import os
from models.resnet_v2_50 import model
from tqdm import tqdm
import cv2

In [2]:
LEARNING_RATE = 0.001
MOMENTUM = 0.8
TRAINING_STEPS = 2000
BATCH_SIZE = 8
NUM_SAMPLES = 3
EMBEDDINGS_DIM = 2048
DROPOUT_KEEP_PROB = 0.8

INPUT_IMG_SIZE = [224, 224]

train_meta = '/home/facialrec/notebooks/datasets/VGGFace2/train_list.txt'
train_root = '/home/facialrec/notebooks/datasets/VGGFace2/train/'
test_meta = '/home/facialrec/notebooks/datasets/VGGFace2/test_list.txt'
test_root = '/home/facialrec/notebooks/datasets/VGGFace2/test/'

In [3]:
def build_dict(meta_filename, root):
    with open(meta_filename, 'rt', encoding='utf-8') as f:
        meta = f.read().strip().split('\n')
        
    actual_files = []
    for m in meta:
        if os.path.exists(os.path.join(root, m)):
            actual_files.append(m)
        
    print(len(actual_files))
    meta = [m.split('/') for m in actual_files]
    
    data_dict = {}
    for id, img in meta:
        if id in data_dict:
            data_dict[id].append(img)
        else:
            data_dict[id] = [img]
            
    return data_dict

train_dict = build_dict(train_meta, train_root)
test_dict = build_dict(test_meta, test_root)

3141890
169396


In [4]:
data_dict = {**train_dict, **test_dict}

In [5]:
def random_samples(source, num_samples):
    assert num_samples > 0
    source_size = len(source)
    if num_samples < source_size:
        indices = []
        while len(indices) < num_samples:
            index = np.random.randint(source_size)
            if index not in indices:
                indices.append(index)
                
        indices = np.array(indices)
    else:
        indices = np.random.randint(source_size, size=num_samples)
        
    indices = indices.astype(np.int32)
    return [source[i] for i in indices]

def load_image(key, img_id):
    path = f'/home/facialrec/notebooks/datasets/VGGFace2/train/{key}/{img_id}'
#     print(path)
    img = cv2.imread(path)
    if img.ndim < 3:
        img = img[..., np.newaxis]
        img = np.tile(img, (1, 1, 3))
    else:
        img = img[..., ::-1]
    img = cv2.resize(img, tuple(INPUT_IMG_SIZE))
    return img

def py_map(key):
    key = key.decode()
    anchors = data_dict[key]
    samples = random_samples(anchors, 2 * NUM_SAMPLES)
    anchor = samples[:NUM_SAMPLES]
    anchor_imgs = [load_image(key, a) for a in anchor]
    positive = samples[NUM_SAMPLES:]
    positive_imgs = [load_image(key, p) for p in positive]
    
    negative_class = key
    while negative_class == key:
        negative_class = np.random.choice(list(data_dict.keys()))
        
    negatives = data_dict[negative_class]
    negative = random_samples(negatives, NUM_SAMPLES)
    negative_imgs = [load_image(negative_class, n) for n in negative]
    return anchor_imgs, positive_imgs, negative_imgs

def _map(i):
    anchor, positive, negative = tf.py_func(py_map, [i], [tf.uint8, tf.uint8, tf.uint8])
    anchor.set_shape((None, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    positive.set_shape((None, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    negative.set_shape((None, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    
    return anchor, positive, negative

def _batch_map(anchor, positive, negative):
    anchor = tf.reshape(anchor, (-1, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    positive = tf.reshape(positive, (-1, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    negative = tf.reshape(negative, (-1, INPUT_IMG_SIZE[0], INPUT_IMG_SIZE[1], 3))
    
    return anchor, positive, negative

def triplet_loss(anchor, positive, negative, eps=1e-7):
    with tf.name_scope('triplet_loss'):
        pos_norm = tf.norm(anchor-positive, ord=2, axis=-1)
        neg_norm = tf.norm(anchor-negative, ord=2, axis=-1)
        loss = tf.maximum(tf.square(pos_norm) - tf.square(neg_norm) + eps, 0)
        return tf.reduce_mean(loss)
    
def nonlinear_triplet_loss(anchor, positive, negative, eps=1e-7, beta=32):
    with tf.name_scope('mse_triplet_loss'):
        pos = -tf.square(anchor - positive) / beta + 1 + eps
        neg = -(beta - tf.square(anchor - negative)) / beta + 1 + eps
        
        pos_log = tf.log(pos)
        neg_log = tf.log(neg)
        
        loss = tf.reduce_sum(pos_log - neg_log, axis=-1)
        return tf.reduce_mean(loss)  

In [6]:
with tf.device('/cpu:0'):
    idx_phr = tf.placeholder(tf.string, shape=[None], name='idx')

    dataset = tf.data.Dataset.from_tensor_slices(idx_phr)
    dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size=500))
    dataset = dataset.map(_map, 6)
    dataset = dataset.apply(tf.contrib.data.ignore_errors())
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(_batch_map, 2)

    train_iterator = dataset.make_initializable_iterator()

    train_batch = train_iterator.get_next()
    anchor, positive, negative = train_batch

In [7]:
def get_model(inputs, is_training_mode, dropout_keep_pro):
    nnet = model(inputs, is_training_mode, dropout_keep_pro)
    nnet = tf.reduce_mean(nnet, [1, 2], keepdims=True)
    nnet = tf.identity(nnet, 'final_reduce_mean')

    nnet = tf.squeeze(nnet, [1, 2])
    nnet = tf.layers.dense(nnet, 2048, activation=tf.sigmoid)
    nnet = tf.identity(nnet, 'final_dense')

    with tf.variable_scope('visual_control'):
        alpha = tf.layers.Dense(1, activation=tf.sigmoid)(nnet)
        alpha = tf.reshape(alpha, (-1, NUM_SAMPLES, 1))
        v = tf.reshape(nnet, (-1, NUM_SAMPLES, EMBEDDINGS_DIM))
        vm = tf.reduce_sum(v * alpha, axis=1, keepdims=True) / tf.reduce_sum(alpha, axis=1, keepdims=True)
        vm = tf.tile(vm, [1, NUM_SAMPLES, 1])

    with tf.variable_scope('content_control'):
        conc_v = tf.concat([v, vm], axis=-1)
        conc_v = tf.reshape(conc_v, (-1, 2 * EMBEDDINGS_DIM))
        betta = tf.layers.Dense(1, activation=tf.sigmoid)(conc_v)
        betta = tf.reshape(betta, (-1, NUM_SAMPLES, 1))

    weights = alpha * betta

    embeddings = tf.reduce_sum(v * weights, axis=1) / tf.reduce_sum(weights, axis=1)  
    return embeddings

In [8]:
with tf.name_scope('anchor'):
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):    
        anchor_embeddings = model(anchor, True, DROPOUT_KEEP_PROB)
        
with tf.name_scope('positive'):
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): 
        positive_embeddings = model(positive, True, DROPOUT_KEEP_PROB)
        
with tf.name_scope('negative'):
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): 
        negative_embeddings = model(negative, True, DROPOUT_KEEP_PROB)

In [9]:
loss = nonlinear_triplet_loss(anchor_embeddings, positive_embeddings, negative_embeddings)

In [10]:
with tf.name_scope('training'):
    optimizer = tf.train.MomentumOptimizer(LEARNING_RATE, MOMENTUM)
    train_op = optimizer.minimize(loss)

In [11]:
sess = tf.Session()
summary_writer = tf.summary.FileWriter('logdir', sess.graph)

sess.run(tf.global_variables_initializer())

sess.run(train_iterator.initializer, feed_dict={idx_phr: list(train_dict.keys())})
    
losses = []
for i in tqdm(range(TRAINING_STEPS)):
    _loss, _ = sess.run([loss, train_op])
    losses.append(_loss)

  0%|          | 1/2000 [00:04<2:34:31,  4.64s/it]


UnknownError: AttributeError: 'NoneType' object has no attribute 'ndim'
Traceback (most recent call last):

  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/script_ops.py", line 158, in __call__
    ret = func(*args)

  File "<ipython-input-5-a9a0bf604193>", line 45, in py_map
    negative_imgs = [load_image(negative_class, n) for n in negative]

  File "<ipython-input-5-a9a0bf604193>", line 45, in <listcomp>
    negative_imgs = [load_image(negative_class, n) for n in negative]

  File "<ipython-input-5-a9a0bf604193>", line 22, in load_image
    if img.ndim < 3:

AttributeError: 'NoneType' object has no attribute 'ndim'


	 [[Node: PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_UINT8, DT_UINT8, DT_UINT8], token="pyfunc_0", _device="/device:CPU:0"](arg0)]]
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?,224,224,3], [?,224,224,3], [?,224,224,3]], output_types=[DT_UINT8, DT_UINT8, DT_UINT8], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator)]]
	 [[Node: IteratorGetNext/_1 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_173_IteratorGetNext", tensor_type=DT_UINT8, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]