In [14]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [1]:
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model
from keras.datasets import mnist
from keras.layers.core import  Activation, Dense, Reshape
from keras.layers import Input, Flatten, Dense, Dropout, Lambda
from keras import backend as K
from keras import layers
from keras.engine.topology import Layer
from keras.optimizers import RMSprop, Adam,SGD
import tensorflow as tf
tf.config.run_functions_eagerly(True)

In [2]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()
print('X_train: ' + str(train_X.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X.shape))
print('Y_test:  '  + str(test_y.shape))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


## For each digit making 3000 anchor , positive and negative samples

In [3]:
import random
anchor_data_set = []
positive_data_set = []
negative_data_set = []
#new_label = []
for i in range(0,10):
  x_train_same = train_X[train_y == i]
  x_train_diff = train_X[train_y != i]
  #select 800 positive samples for each number
  for k in range(3000):
    ind_anchor = random.randint(0,len(x_train_same)-1)
    ind_pos = random.randint(0,len(x_train_same)-1)
    ind_neg = random.randint(0,len(x_train_diff)-1)
    anchor_data_set.append(x_train_same[ind_anchor])
    positive_data_set.append(x_train_same[ind_pos])                   
    negative_data_set.append(x_train_diff[ind_neg])

In [4]:
positive_data_set = np.stack(positive_data_set,axis=0).astype('float64')
negative_data_set = np.stack(negative_data_set,axis=0).astype('float64')
anchor_data_set = np.stack(anchor_data_set,axis=0).astype('float64')

In [5]:
positive_data_set = positive_data_set.reshape(-1,28*28)
negative_data_set = negative_data_set.reshape(-1,28*28)
anchor_data_set = anchor_data_set.reshape(-1,28*28)

In [6]:
print(positive_data_set.shape)
print(negative_data_set.shape)
print(anchor_data_set.shape)

(30000, 784)
(30000, 784)
(30000, 784)


In [7]:
def buildBranchModel():
  inpx = Input(shape=(784,))
  x = Dense(128,activation='relu')(inpx)
  x = Dropout(0.1)(x)
  x = Dense(128,activation='relu')(x)
  x = Dropout(0.1)(x)
  x = Dense(128,activation='relu')(x)
  return Model([inpx],[x])

In [8]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        anchor, positive, negative = inputs
        p_dist = K.sum(K.square(anchor-positive), axis=1)
        n_dist = K.sum(K.square(anchor-negative), axis=1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss

In [9]:
def dummy_loss(y_true, y_pred):
    #print y_true.type,y_pred.type
    #return K.zeros_like(y_pred)
    return y_pred

In [10]:
test_images = test_X.reshape(-1,28*28)

# RMS Optimizer

In [15]:
branchModel = buildBranchModel()
input_anchor = Input(shape=(784,))
input_positive = Input(shape=(784,))
input_negative = Input(shape=(784,))

output_anchor = branchModel(input_anchor)
output_positive = branchModel(input_positive)
output_negative = branchModel(input_negative)

loss_layer = TripletLossLayer(alpha=500)([output_anchor,output_positive,output_negative])

model = Model([input_anchor,input_positive,input_negative],loss_layer)

rms = RMSprop(lr=0.0001)

model.compile(loss=dummy_loss,optimizer=rms)

In [16]:
model.fit([anchor_data_set,positive_data_set,negative_data_set],None,epochs=25,batch_size=40)

Epoch 1/25
  5/750 [..............................] - ETA: 20s - loss: 702333.4250

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7f6a84153d10>

In [17]:
# create an image pool and save the output of the model here
image_pool = []
for i in range(0,10):
  image_pool.append(train_X[train_y == i][0])
image_pool = np.stack(image_pool,axis=0).astype('float64').reshape(-1,28*28)
image_pool_output = []
for img in image_pool:
  image_pool_output.append(branchModel(img.reshape(1,784)))
image_pool_output = np.stack(image_pool_output,axis=0).astype('float64')

In [18]:
def predict(test_image):
  pred_1 = branchModel(test_image.reshape(1,784))
  dis = K.mean((image_pool_output-pred_1)**2,axis=2).numpy()
  return np.argmin(dis)

In [19]:
predictions = []
for img in test_images:
  predictions.append(predict(img))
correct_predictions_rms = (predictions == test_y).sum()

In [20]:
print(correct_predictions_rms," out of ",test_images.shape[0],"accuracy is : ",correct_predictions_rms/test_images.shape[0])

9177  out of  10000 accuracy is :  0.9177


# Adam optimizer

In [21]:
branchModel_adam = buildBranchModel()
input_anchor_adam = Input(shape=(784,))
input_positive_adam = Input(shape=(784,))
input_negative_adam = Input(shape=(784,))

output_anchor_adam = branchModel_adam(input_anchor_adam)
output_positive_adam = branchModel_adam(input_positive_adam)
output_negative_adam = branchModel_adam(input_negative_adam)

loss_layer_adam = TripletLossLayer(alpha=500)([output_anchor_adam,output_positive_adam,output_negative_adam])

model_adam = Model([input_anchor_adam,input_positive_adam,input_negative_adam],loss_layer_adam)

adam = Adam(lr=0.0001)

model_adam.compile(loss=dummy_loss,optimizer=adam)

In [23]:
model_adam.fit([anchor_data_set,positive_data_set,negative_data_set],None,epochs=20,batch_size=40)

Epoch 1/20
  4/750 [..............................] - ETA: 15s - loss: 916896.1875 

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6a6b1c0990>

In [24]:
# create an image pool and save the output of the model here
image_pool_adam = []
for i in range(0,10):
  image_pool_adam.append(train_X[train_y == i][0])
image_pool_adam = np.stack(image_pool_adam,axis=0).astype('float64').reshape(-1,28*28)
image_pool_output_adam = []
for img in image_pool_adam:
  image_pool_output_adam.append(branchModel_adam(img.reshape(1,784)))
image_pool_output_adam = np.stack(image_pool_output_adam,axis=0).astype('float64')

def predict_adam(test_image):
  pred_1 = branchModel_adam(test_image.reshape(1,784))
  dis = K.mean((image_pool_output_adam-pred_1)**2,axis=2).numpy()
  return np.argmin(dis)

predictions_adam = []
for img in test_images:
  predictions_adam.append(predict_adam(img))
correct_predictions_adam = (predictions_adam == test_y).sum()

print(correct_predictions_adam," out of ",test_images.shape[0],"accuracy is : ",correct_predictions_adam/test_images.shape[0])

9016  out of  10000 accuracy is :  0.9016


# SGD optimizer

In [32]:
branchModel_sgd = buildBranchModel()
input_anchor_sgd = Input(shape=(784,))
input_positive_sgd = Input(shape=(784,))
input_negative_sgd = Input(shape=(784,))

output_anchor_sgd = branchModel_sgd(input_anchor_sgd)
output_positive_sgd = branchModel_sgd(input_positive_sgd)
output_negative_sgd = branchModel_sgd(input_negative_sgd)

loss_layer_sgd = TripletLossLayer(alpha=500)([output_anchor_sgd,output_positive_sgd,output_negative_sgd])

model_sgd = Model([input_anchor_sgd,input_positive_sgd,input_negative_sgd],loss_layer_sgd)

sgd = SGD(lr=0.0000001, momentum=0.2)

model_sgd.compile(loss=dummy_loss,optimizer=sgd)

In [33]:
model_sgd.fit([anchor_data_set,positive_data_set,negative_data_set],None,epochs=20,batch_size=40)

Epoch 1/20
  7/750 [..............................] - ETA: 14s - loss: 1067518.8661

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6a65e82d90>

In [35]:
# create an image pool and save the output of the model here
image_pool_sgd = []
for i in range(0,10):
  image_pool_sgd.append(train_X[train_y == i][0])
image_pool_sgd = np.stack(image_pool_sgd,axis=0).astype('float64').reshape(-1,28*28)
image_pool_output_sgd = []
for img in image_pool_sgd:
  image_pool_output_sgd.append(branchModel_sgd(img.reshape(1,784)))
image_pool_output_sgd = np.stack(image_pool_output_sgd,axis=0).astype('float64')

def predict_sgd(test_image):
  pred_1 = branchModel_sgd(test_image.reshape(1,784))
  dis = K.mean((image_pool_output_sgd-pred_1)**2,axis=2).numpy()
  return np.argmin(dis)

predictions_sgd = []
for img in test_images:
  predictions_sgd.append(predict(img))
correct_predictions_sgd = (predictions_sgd == test_y).sum()

print(correct_predictions_sgd," out of ",test_images.shape[0],"accuracy is : ",correct_predictions_sgd/test_images.shape[0])

9177  out of  10000 accuracy is :  0.9177


In [37]:
from prettytable import PrettyTable
t = PrettyTable(['','Adam ', 'RMSProp ','SGD'])
t.add_row(['Accuracy',correct_predictions_adam/test_images.shape[0],correct_predictions_rms/test_images.shape[0],correct_predictions_sgd/test_images.shape[0]])
print(t)

+----------+--------+----------+--------+
|          | Adam   | RMSProp  |  SGD   |
+----------+--------+----------+--------+
| Accuracy | 0.9016 |  0.9177  | 0.9177 |
+----------+--------+----------+--------+


# Hyper parameter - margin

In [40]:
branchModel_200 = buildBranchModel()
input_anchor_200 = Input(shape=(784,))
input_positive_200 = Input(shape=(784,))
input_negative_200 = Input(shape=(784,))

output_anchor_200 = branchModel_adam(input_anchor_200)
output_positive_200 = branchModel_adam(input_positive_200)
output_negative_200 = branchModel_adam(input_negative_200)

loss_layer_200 = TripletLossLayer(alpha=200)([output_anchor_200,output_positive_200,output_negative_200])

model_200 = Model([input_anchor_200,input_positive_200,input_negative_200],loss_layer_200)

adam = Adam(lr=0.00001)

model_200.compile(loss=dummy_loss,optimizer=adam)

In [41]:
model_200.fit([anchor_data_set,positive_data_set,negative_data_set],None,epochs=20,batch_size=40)

Epoch 1/20
  7/750 [..............................] - ETA: 15s - loss: 812.0749

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6a63ee3090>

In [44]:
# create an image pool and save the output of the model here
image_pool_200 = []
for i in range(0,10):
  image_pool_200.append(train_X[train_y == i][0])
image_pool_200 = np.stack(image_pool_200,axis=0).astype('float64').reshape(-1,28*28)
image_pool_output_200 = []
for img in image_pool_200:
  image_pool_output_200.append(branchModel_200(img.reshape(1,784)))
image_pool_output_200 = np.stack(image_pool_output_200,axis=0).astype('float64')

def predict_200(test_image):
  pred_1 = branchModel_200(test_image.reshape(1,784))
  dis = K.mean((image_pool_output_200-pred_1)**2,axis=2).numpy()
  return np.argmin(dis)

predictions_200 = []
for img in test_images:
  predictions_200.append(predict_200(img))
correct_predictions_200 = (predictions_200 == test_y).sum()

print(correct_predictions_200," out of ",test_images.shape[0],"accuracy is : ",correct_predictions_200/test_images.shape[0])

3341  out of  10000 accuracy is :  0.3341


## As we can see accuracy dropped from 91% to 33% on changing margin from 500 to 200