In [23]:
# Constants used in training
BatchSize = 256
NbrNegatives = 100
Device="/gpu:0"

# How much to scale the cosine similarity before applying softmax
SoftmaxScaling = 5

w2v_training_path = './Data/w2v_train2014.out.npy'
exemplar_training_path = './Data/exemplar_train2014.out.npy'

w2v_validation_path = './Data/w2v_val2014.out.npy'
exemplar_validation_path = './Data/exemplar_val2014.out.npy'

model_save_path = "./model/sess_stored_old_training"

In [24]:
import numpy as np
import tensorflow as tf
import random
from datetime import datetime

In [25]:
# Load training data as numpy arrays into data set
w2v_train = np.load(w2v_training_path)
exemplar_train = np.load(exemplar_training_path)

nbr_images = len(exemplar_train)
nbr_batches = nbr_images // BatchSize
#w2v_train = w2v_train[0:nbr_batches*BatchSize, ]
#exemplar_train = exemplar_train[0:nbr_batches*BatchSize,]

# Convert to float32 in case training data is np.float64
w2v_train = w2v_train.astype(np.float32)
exemplar_train = exemplar_train.astype(np.float32)

print("Total number of training examples: " + str(nbr_images))
print("Shape of exemplar data: " + str(exemplar_train.shape))
print("Shape of w2v data: " + str(w2v_train.shape))

# Load validation data
w2v_val = np.load(w2v_validation_path).astype(np.float32)
exemplar_val = np.load(exemplar_validation_path).astype(np.float32)

nbr_images_val = len(exemplar_val)
print("Total number of validation examples: " + str(nbr_images_val))
print("Shape of exemplar validation data: " + str(exemplar_val.shape))
print("Shape of w2v validation data: " + str(w2v_val.shape))


Total number of training examples: 414073
Shape of exemplar data: (414073, 1024)
Shape of w2v data: (414073, 300)
Total number of validation examples: 202654
Shape of exemplar validation data: (202654, 1024)
Shape of w2v validation data: (202654, 300)


In [26]:
# Take a tensor and tile it vertically NbrNegative times, with random shuffle (by rotation) in between each tiling
def create_negatives(y):
    with tf.device(Device):

        temp = tf.tile(y, [1, 1])

        for i in range(NbrNegatives):
            rand = int((random.random() + i) * BatchSize / NbrNegatives)
            left = tf.slice(temp, [rand, 0], [BatchSize - rand, -1])
            right = tf.slice(temp, [0, 0], [rand, -1])
            y = tf.concat([y, left, right], axis=0)
        return y

In [27]:
#Create the cosine similarity between a batch of positiva and a tiled batch of negatives
def create_cosine_similarity(query_y, doc_y, scaling):
    with tf.device(Device):
        print(query_y)
        print(doc_y)
        print("norms:")
        query_norm = tf.tile(tf.sqrt(tf.reduce_sum(tf.square(query_y), 1, True)), [NbrNegatives + 1, 1])
        doc_norm = tf.sqrt(tf.reduce_sum(tf.square(doc_y), 1, True))
        print(query_norm)
        print(doc_norm)

        prod = tf.multiply(tf.tile(query_y, [NbrNegatives + 1, 1]), doc_y)
        print("Prod:")
        print(prod)
        norm_prod = tf.multiply(query_norm, doc_norm)
        print("Norm prod:")
        print(norm_prod)
        norm_sum = tf.reduce_sum(prod, 1, True)
        print("Norm sum")
        print(norm_sum)
        cos_sim_raw = tf.truediv(norm_sum, norm_prod)
        
        print("cosine")
        print(cos_sim_raw)
        cos_sim = tf.transpose(tf.reshape(tf.transpose(cos_sim_raw), [NbrNegatives + 1, BatchSize])) * scaling
        print(cos_sim)

        return cos_sim

In [28]:
# Loss function
def create_loss(cos_sim, cos_sim_not_scaled):
    with tf.device("/gpu:0"):
        prob = tf.nn.softmax(cos_sim)
        prob2 = cos_sim_not_scaled

        hit_prob = tf.slice(prob, [0, 0], [-1, 1])
        hit_prob2 = tf.slice(prob2, [0, 0], [-1, 1])

        loss = tf.identity(-tf.reduce_sum(tf.log(hit_prob)) / BatchSize, name='loss')

        loss_raw = tf.identity(tf.reduce_sum((hit_prob2)) / BatchSize, name='loss_raw')

        return loss, loss_raw

In [29]:
# Create network that maps a 300-dimensional Word2Vec vector into the new similarity vector space
def create_w2v_network(input, is_training):
    with tf.device(Device):
        l1 = tf.layers.dropout(tf.layers.dense(name='w2v_l1', inputs=input, units=300, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()), rate = 0.3, training=is_training)
        l2 = tf.layers.dropout(tf.layers.dense(name='w2v_l2', inputs=l1, units=250, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()), rate=0.3, training=is_training)
        l3 = tf.layers.dense(name='w2v_l3', inputs=l2, units=200, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer())

        return tf.layers.batch_normalization(l3, axis=1, training=is_training)

In [30]:
# Create network that maps a 300-dimensional Exemplar vector into the new similarity vector space
def create_exemplar_network(input, is_training):
    with tf.device(Device):
        l1 = tf.layers.dropout(tf.layers.dense(name='exemplar_l1', inputs=input, units=800, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()), rate=0.4, training=is_training)
        l2 = tf.layers.dropout(tf.layers.dense(name='exemplar_l2', inputs=l1, units=500, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()), rate=0.2, training=is_training)
        l3 = tf.layers.dropout(tf.layers.dense(name='exemplar_l3', inputs=l2, units=350, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer()), rate=0.2, training=is_training)
        l4 = tf.layers.dense(name='exemplar_l4', inputs=l3, units=200, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer())

        return tf.layers.batch_normalization(l4, axis=1, training=is_training)

In [31]:
#Create the network graph
tf.reset_default_graph()

with tf.device(Device):
    # Placeholders for the input dataset
    exemplar_placeholder = tf.placeholder(exemplar_train.dtype, [None, 1024], name='exemplar_placeholder')
    w2v_placeholder = tf.placeholder(w2v_train.dtype, [None, 300])

    # Setup datasets for training and prediction
    dataset = tf.data.Dataset.from_tensor_slices((exemplar_placeholder, w2v_placeholder)).shuffle(buffer_size=nbr_images).repeat().batch(BatchSize) #shuffle repeat
    dataset_val = tf.data.Dataset.from_tensor_slices((exemplar_placeholder, w2v_placeholder)).shuffle(buffer_size=nbr_images_val).repeat().batch(BatchSize) #shuffle repeat

    dataset_pred = tf.data.Dataset.from_tensor_slices((exemplar_placeholder)).batch(BatchSize)

    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle, dataset.output_types, dataset.output_shapes)

    # input for training
    iter = dataset.make_initializable_iterator()
    iter_val = dataset_val.make_initializable_iterator()
    
    x_exemplar, x_w2v = iterator.get_next()
    
    #input for prediction
    iter_pred = dataset_pred.make_initializable_iterator(shared_name='apa')
    x_exemplar_pred = iter_pred.get_next()

    #define outputs from w2v network
    y_w2v = create_w2v_network(x_w2v, is_training=True)
    
    # output from exemplar network
    with tf.variable_scope("model", reuse=False):
        y_exemplar = create_exemplar_network(x_exemplar, True)
        
    # output from exemplar network for prediction (i.e. no dropout applied)
    with tf.variable_scope("model", reuse=True):
        y_exemplar_pred = tf.identity(create_exemplar_network(exemplar_placeholder, False), name='y_exemplar_pred')

    #create tensor of w2v output with negatives
    y_negatives = create_negatives(y_w2v)

    #define loss
    cos_similarity = create_cosine_similarity(y_exemplar, y_negatives, SoftmaxScaling)
    cos_similarity_unscaled = create_cosine_similarity(y_exemplar, y_negatives, 1)

    #create loss
    loss, loss_raw = create_loss(cos_similarity, cos_similarity_unscaled)

    #define optimizer
    train_step = tf.train.AdamOptimizer(name='train_step').minimize(loss)


Tensor("model/batch_normalization/batchnorm/add_1:0", shape=(?, 200), dtype=float32, device=/device:GPU:0)
Tensor("concat_99:0", shape=(?, 200), dtype=float32, device=/device:GPU:0)
norms:
Tensor("Tile_1:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
Tensor("Sqrt_1:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
Prod:
Tensor("Mul:0", shape=(?, 200), dtype=float32, device=/device:GPU:0)
Norm prod:
Tensor("Mul_1:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
Norm sum
Tensor("Sum_2:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
cosine
Tensor("truediv:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
Tensor("mul_2:0", shape=(256, 101), dtype=float32, device=/device:GPU:0)
Tensor("model/batch_normalization/batchnorm/add_1:0", shape=(?, 200), dtype=float32, device=/device:GPU:0)
Tensor("concat_99:0", shape=(?, 200), dtype=float32, device=/device:GPU:0)
norms:
Tensor("Tile_3:0", shape=(?, 1), dtype=float32, device=/device:GPU:0)
Tensor("Sqrt_3:0", shape=(?,

In [32]:
#initialize tensorflow
init = tf.global_variables_initializer()
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(init)

In [33]:
# Create saver
saver = tf.train.Saver()

In [34]:
# Feed the training data to the dataset
sess.run(iter.initializer, feed_dict={exemplar_placeholder: exemplar_train, w2v_placeholder: w2v_train})
sess.run(iter_val.initializer, feed_dict={exemplar_placeholder: exemplar_val, w2v_placeholder: w2v_val})

training_handle = sess.run(iter.string_handle())
validation_handle = sess.run(iter_val.string_handle())


In [35]:
# Compute colossst for validation set
def run_validation(count):
    sum = 0.0
    for i in range(0, count):
        sum = sum + sess.run(loss_raw,  feed_dict={handle: validation_handle})
    sum = sum / count
    print("Validation loss over " + str(count) +  " batches: " + str(sum))
    print()
    
#Training loop

NbrEpochs = 2
PrintoutsPerEpoch = 10

batches_per_epoch = nbr_batches
batches_to_print = batches_per_epoch // PrintoutsPerEpoch

print("Training " + str(nbr_batches) + " batches\r\n")

run_validation(100)

trainingStartTime = datetime.now()

for epoch in range(NbrEpochs):
    print("Epoch " + str(epoch))
    
    epochStartTime = datetime.now()
    
    for j in range(batches_per_epoch):
        if j % batches_to_print == 0:
            a,b = sess.run([train_step, loss_raw],  feed_dict={handle: training_handle})
            print("Raw loss: " + str(b))
            
            run_validation(5)
        else:
            sess.run(train_step,  feed_dict={handle: training_handle})
            
    print()
    print("Time for epoch:", datetime.now() - epochStartTime)
    print()
    run_validation(100)
    
print("Time for training:", datetime.now() - trainingStartTime)


Training 1617 batches

Validation loss over 100 batches: -0.0008369434965425171

Epoch 0
Raw loss: 0.0009663295
Validation loss over 5 batches: 0.009247484849765897

Raw loss: 0.6915869
Validation loss over 5 batches: 0.6913704514503479

Raw loss: 0.73022103
Validation loss over 5 batches: 0.7242599368095398

Raw loss: 0.7536123
Validation loss over 5 batches: 0.7338937759399414

Raw loss: 0.76469576
Validation loss over 5 batches: 0.741386330127716

Raw loss: 0.7284039
Validation loss over 5 batches: 0.7557746171951294

Raw loss: 0.75618553
Validation loss over 5 batches: 0.747964596748352

Raw loss: 0.7446423
Validation loss over 5 batches: 0.7554044604301453

Raw loss: 0.7406895
Validation loss over 5 batches: 0.7523503065109253

Raw loss: 0.770108
Validation loss over 5 batches: 0.7534955501556396

Raw loss: 0.7443092
Validation loss over 5 batches: 0.7454350352287292


Time for epoch: 0:03:50.724578

Validation loss over 100 batches: 0.7466109448671341

Epoch 1
Raw loss: 0.7578566

In [36]:
saver.save(sess, model_save_path)

'./model/sess_stored_old_training'

In [30]:
#tensorboard

In [None]:
#rank metric