In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle

In [2]:
BATCH_SIZE = 256
EPOCHS = 10
LEARNING_RATE = 0.0001
CAPTION_INPUT_SIZE = 300
FRAME_INPUT_SIZE = 500
CAPTION_LATENT_SIZE = 400
FRAME_LATENT_SIZE = 500

In [3]:
# Creating placeholders
caption_placeholder = tf.placeholder(tf.float32, shape = [None, None, CAPTION_INPUT_SIZE])
frame_placeholder = tf.placeholder(tf.float32, shape = [None, None, FRAME_INPUT_SIZE])
y_placeholder = tf.placeholder(tf.float32, shape = [None])

# Setting GPU config
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.8)
config = tf.ConfigProto(allow_soft_placement = True, gpu_options = gpu_options)
config.gpu_options.allow_growth = True

In [4]:
# Defining the Neural Network Graph for modified Siamese Network
def train_caption_embeddings(x_placeholder, latent_dim):
    cell = tf.nn.rnn_cell.GRUCell(latent_dim, kernel_initializer = tf.contrib.layers.variance_scaling_initializer(), name = 'caption_cells')
    cells = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = 0.5)
    x, s = tf.nn.dynamic_rnn(cells, x_placeholder, dtype = tf.float32, swap_memory = True)
    x = tf.contrib.layers.batch_norm(x, is_training = True, updates_collections = None)
    x = tf.nn.dropout(x, rate = 0.2)
    print(x.shape)
    x = tf.reshape(x, shape = [-1, 50 * latent_dim])
    print(x.shape)
    x = tf.nn.dropout(x, rate = 0.2)
    x = tf.layers.dense(x, latent_dim, kernel_initializer = tf.contrib.layers.variance_scaling_initializer())
    out = tf.nn.relu(x)
    print(out.shape)
    return out

def train_frame_embeddings(x_placeholder, latent_dim):
    cell = tf.nn.rnn_cell.GRUCell(latent_dim, kernel_initializer = tf.contrib.layers.variance_scaling_initializer(), name = 'frame_cells')
    cells = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = 0.5)
    x, s = tf.nn.dynamic_rnn(cells, x_placeholder, dtype = tf.float32, swap_memory = True)
    x = tf.contrib.layers.batch_norm(x, is_training = True, updates_collections = None)
    x = tf.nn.dropout(x, rate = 0.2)
    print(x.shape)
    x = tf.reshape(x, shape = [-1, 50 * latent_dim])
    print(x.shape)
    x = tf.nn.dropout(x, rate = 0.2)
    x = tf.layers.dense(x, latent_dim, kernel_initializer = tf.contrib.layers.variance_scaling_initializer())
    out = tf.nn.relu(x)
    print(out.shape)
    return out

In [5]:
caption_out = train_caption_embeddings(caption_placeholder, CAPTION_LATENT_SIZE) #Caption
frame_out_full = train_frame_embeddings(frame_placeholder, FRAME_LATENT_SIZE) #Frame

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
(?, ?, 400)
(?, 20000)
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for upda

In [6]:
# Taking the first CAPTION_LATENT_SIZE from frame latent vector
frame_out = frame_out_full[:, :CAPTION_LATENT_SIZE]

In [7]:
print(frame_out.shape)

(?, 400)


In [8]:
# Taking cosine similarity
normalize_caption = tf.nn.l2_normalize(caption_out, 0)        
normalize_frame = tf.nn.l2_normalize(frame_out, 0)
cos_similarity = tf.reduce_mean(tf.multiply(normalize_caption, normalize_frame), axis = 1)
# cos_similarity = tf.reduce_mean(tf.multiply(caption_out, frame_out), axis = 1)

In [9]:
print(cos_similarity.shape)

(?,)


In [10]:
final_out = tf.cast(tf.math.greater(tf.nn.sigmoid(cos_similarity), 0.5), tf.int16)
accuracy = tf.metrics.accuracy(labels = y_placeholder, predictions = final_out)

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y_placeholder, logits = cos_similarity))
optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE)
train = optimizer.minimize(loss)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
# Reading triples list for training and validation
with open(r'/home/login/Paired/train_triples_list.pickle', 'rb') as f:
    train_triples_list = pickle.load(f)
    
with open(r'/home/login/Paired/val_triples_list.pickle', 'rb') as f:
    val_triples_list = pickle.load(f)

In [12]:
train_anchor = np.stack([each[0] for each in train_triples_list], axis = 0)
train_positive = np.stack([each[1] for each in train_triples_list], axis = 0)
train_negative = np.stack([each[2] for each in train_triples_list], axis = 0)

In [13]:
print(train_anchor.shape)
print(train_positive.shape)
print(train_negative.shape)

(461750, 50, 300)
(461750, 50, 500)
(461750, 50, 500)


In [14]:
## Creating pairs from triples

train_triples_list, val_triples_list = [], []
train_frame =  np.concatenate((train_positive, train_negative))

train_positive, train_negative = [], []
train_caption =  np.concatenate((train_anchor, train_anchor))
train_anchor = []

In [24]:
## Creating y list, it will be 1 for positive pairs 0 for negative pairs
positive_y = [1] * int(train_frame.shape[0]/2)
negative_y = [0] * int(train_frame.shape[0]/2)
y = []
y.extend(positive_y)
y.extend(negative_y)
# y = np.array(y)

In [25]:
print(train_caption.shape)
print(train_frame.shape)
print(len(y))

(923500, 50, 300)
(923500, 50, 500)
923500


In [26]:
train_triples_list, val_triples_list = [], []
train_anchor, train_positive, train_negative = [], [], []

In [27]:
init = tf.global_variables_initializer()
sess = tf.Session(config = config)

In [32]:
# Training the model
sess.run(init)
for i in range(EPOCHS):
    for idx in range(0, train_caption.shape[0], BATCH_SIZE):
        train_caption_batch, train_frame_batch, y_batch = train_caption[idx : idx + BATCH_SIZE], train_frame[idx : idx + BATCH_SIZE], y[idx : idx + BATCH_SIZE]
        sess.run(train, feed_dict = {caption_placeholder : train_caption_batch, \
                                     frame_placeholder : train_frame_batch, \
                                     y_placeholder : y_batch})
    
    if i % 1 == 0:
#         print("HELLLOOOO")
        tot_loss, val_tot_loss = 0, 0
        for idx in range(0, train_caption.shape[0], BATCH_SIZE):
            train_caption_batch, train_frame_batch, y_batch = train_caption[idx : idx + BATCH_SIZE], train_frame[idx : idx + BATCH_SIZE], y[idx : idx + BATCH_SIZE]
            loss_ = sess.run(loss, feed_dict = {caption_placeholder : train_caption_batch, \
                                            frame_placeholder : train_frame_batch, \
                                            y_placeholder : y_batch})
            tot_loss += loss_
            
#             val_anchor_batch, val_positive_batch, val_negative_batch = val_anchor[idx : idx + BATCH_SIZE], val_positive[idx : idx + BATCH_SIZE], val_negative[idx : idx + BATCH_SIZE]
#             val_loss_ = sess.run(loss, feed_dict = {caption_placeholder : val_anchor_batch, \
#                                             frame_1_placeholder : val_positive_batch, \
#                                             frame_2_placeholder : val_negative_batch})
#             val_tot_loss += val_loss_
#         print("After epoch {} train loss is {:.4f} valid loss is {:.4f}".format(i, tot_loss, val_tot_loss))
        print("After epoch {} train loss is {:.2f}".format(i, tot_loss))


After epoch 0 train loss is 2500.11
After epoch 1 train loss is 2500.04
After epoch 2 train loss is 2499.56
After epoch 3 train loss is 2498.09
After epoch 4 train loss is 2494.21
After epoch 5 train loss is 2484.74
After epoch 6 train loss is 2462.46
After epoch 7 train loss is 2411.39
After epoch 8 train loss is 2496.07
After epoch 9 train loss is 2340.89
