In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from keras import backend as K
import numpy as np
from tqdm import tqdm
from knn import kNN, convert_embeddings_to_tf
from load_mini_imagenet import load_mini_imagenet_data, split_for_n_shot
tf.compat.v1.enable_eager_execution()

In [2]:
def initialize_weights(shape, name=None, dtype=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer weights with mean as 0.0 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.0, scale = 1e-2, size = shape)

def initialize_bias(shape, name=None, dtype=None):
    """
        The paper, http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
        suggests to initialize CNN layer bias with mean as 0.5 and standard deviation of 0.01
    """
    return np.random.normal(loc = 0.5, scale = 1e-2, size = shape)

def L2_Norm(vectors):
    # unpack the vectors into separate lists
    (featsA, featsB) = vectors
    # compute the sum of squared distances between the vectors
    sumSquared = K.sum(K.square(featsA - featsB), axis=1,
      keepdims=True)
    # return the euclidean distance between the vectors
    return K.sqrt(K.maximum(sumSquared, K.epsilon()))

In [3]:
def get_siamese_model(input_shape):
    """
        Model architecture based on the one provided in: http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf
    """
    
    # Define the tensors for the two input images
    left_input = layers.Input(input_shape)
    right_input = layers.Input(input_shape)
    
    # Convolutional Neural Network
    model = models.Sequential()
    model.add(layers.Conv2D(128, (10,10), activation='relu', input_shape=input_shape,
                   kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4)))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(256, (7,7), activation='relu',
                     kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(256, (4,4), activation='relu', kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(layers.MaxPooling2D())
    model.add(layers.Conv2D(512, (4,4), activation='relu', kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='sigmoid',
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L2_distance = layers.Lambda(L2_Norm)([encoded_l, encoded_r])
    
    # Connect the inputs with the outputs
    # the output is going to be the distance itself
    siamese_net = Model(inputs=[left_input,right_input],outputs=L2_distance)
    
    # return the model
    return siamese_net

In [4]:
model = get_siamese_model((84, 84, 3))
model.summary()
model.load_weights("weights/weights_contrastive_loss_2.h5")

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 84, 84, 3)]  0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 84, 84, 3)]  0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 4096)         23669376    input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 1)            0           sequential[1][0]             

In [5]:
X_test, y_test = load_mini_imagenet_data("mini-imagenet-cache-test.pkl")
X_support, y_support, X_test, y_test = split_for_n_shot(X_test, y_test, n=5)
mean = np.mean(X_support, axis=(0, 1, 2))
stddev = np.std(X_support, axis=(0, 1, 2))
X_support_norm = np.float32((X_support - mean) / stddev)
X_test_norm = np.float32((X_test - mean) / stddev)

In [7]:
support_features = model.layers[2](X_support_norm).numpy()

In [8]:
support_features.shape

(100, 4096)

In [11]:
correct = 0
for i, label in tqdm(enumerate(y_test)):
    feat = model.layers[2](np.array([X_test_norm[i]])).numpy().squeeze()
    predicted = kNN(support_features, y_support, feat, k=1)
    if predicted == label:
        correct += 1

11900it [05:27, 36.32it/s]


In [12]:
print(correct / len(y_test))

0.1354621848739496


In [None]:
tf.compat.v1.disable_eager_execution()
convert_embeddings_to_tf(support_features, y_support, log_dir="logs/contrastive/")