In [None]:
# Taken from
# https://stackoverflow.com/questions/48750199/google-colaboratory-misleading-information-about-its-gpu-only-5-ram-available
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# Colab only provides one GPU and it is not always guaranteed
gpu = GPUs[0]
def printm():
    process = psutil.Process(os.getpid())
    print("RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
    print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

In [None]:
printm()

In [None]:
# Clone repo
!git clone https://github.com/MatchLab-Imperial/keras_triplet_descriptor

In [None]:
# Change directory
%cd keras_triplet_descriptor    


In [None]:
# Download data
!wget -O hpatches_data.zip https://imperialcollegelondon.box.com/shared/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip


In [None]:
# Extract data
!unzip -q ./hpatches_data.zip
!rm ./hpatches_data.zip

## Importing Necessary Modules

We now import the modules we will use in this baseline code. 

In [None]:
import sys
import json
import os
import glob
import time
import tensorflow as tf
import numpy as np
import cv2
import random

import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Conv2DTranspose
from keras.layers import Input, UpSampling2D, concatenate  

from read_data import HPatches, DataGeneratorDesc, hpatches_sequence_folder, DenoiseHPatches, tps
from utils import generate_desc_csv, plot_denoise, plot_triplet

In [None]:
random.seed(1234)
np.random.seed(1234)
tf.set_random_seed(1234)

In [None]:
hpatches_dir = './hpatches'
splits_path = 'splits.json'

splits_json = json.load(open(splits_path, 'rb'))
split = splits_json['a']

train_fnames = split['train']
test_fnames = split['test']

seqs = glob.glob(hpatches_dir+'/*')
seqs = [os.path.abspath(p) for p in seqs]   

In [None]:
seqs_train = list(filter(lambda x: x.split('\\')[-1] in train_fnames, seqs)) 
seqs_test = list(filter(lambda x: x.split('\\')[-1] in split['test'], seqs)) 

In [None]:
len(seqs)

In [None]:
len(seqs_train)

In [None]:
len(seqs_test)

## Models and loss

In [None]:
def get_denoise_model(shape, do = 0, activate = 'selu'):
  
    inputs = Input(shape)
    
    conv1 = Dropout(do)((Conv2D(32, (3, 3), activation = activate, padding='same')(inputs)))
    conv1 = Dropout(do)((Conv2D(32, (3, 3), activation = activate, padding='same')(conv1)))
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Dropout(do)((Conv2D(64, (3, 3), activation =  activate, padding='same')(pool1)))
    conv2 = Dropout(do)((Conv2D(64, (3, 3), activation =  activate, padding='same')(conv2)))
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Dropout(do)((Conv2D(128, (3, 3), activation =  activate, padding='same')(pool2)))
    conv3 = Dropout(do)((Conv2D(128, (3, 3), activation =  activate, padding='same')(conv3)))
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Dropout(do)((Conv2D(256, (3, 3), activation =  activate, padding='same')(pool3)))
    conv4 = Dropout(do)((Conv2D(256, (3, 3), activation =  activate , padding='same')(conv4)))
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Dropout(do)((Conv2D(512, (3, 3), activation =  activate, padding='same')(pool4)))
    conv5 = (Conv2D(512, (3, 3), activation =  activate, padding='same'))(UpSampling2D(size = (2,2))(conv5))

    up6 = concatenate([conv5, conv4], axis=3)
    conv6 = Dropout(do)((Conv2D(256, (3, 3), activation =  activate, padding='same')(up6)))
    conv6 = (Conv2D(256, (3, 3), activation =  activate, padding='same'))(UpSampling2D(size = (2,2))(conv6))

    up7 = concatenate([conv6, conv3], axis=3)
    conv7 = Dropout(do)((Conv2D(128, (3, 3), activation =  activate, padding='same')(up7)))
    conv7 = (Conv2D(128, (3, 3), activation =  activate, padding='same'))(UpSampling2D(size = (2,2))(conv7))

    up8 = concatenate([conv7, conv2], axis=3)
    conv8 = Dropout(do)((Conv2D(64, (3, 3), activation =  activate, padding='same')(up8)))
    conv8 = (Conv2D(64, (3, 3), activation =  activate, padding='same'))(UpSampling2D(size = (2,2))(conv8))

    up9 = concatenate([conv8, conv1], axis=3)
    conv9 = Dropout(do)((Conv2D(32, (3, 3), activation =  activate, padding='same')(up9)))
    conv9 = Dropout(do)((Conv2D(32, (3, 3), activation =  activate, padding='same')(conv9)))

    conv10 = Dropout(do)(Conv2D(1, (1, 1))(conv9))

    model = Model(inputs=[inputs], outputs=[conv10])

    return model



def get_descriptor_model(shape, activate= 'elu'):
  
    '''Architecture copies HardNet architecture'''

    init_weights = keras.initializers.he_normal()

    descriptor_model = Sequential()
    descriptor_model.add(Conv2D(32, 3, padding='same', input_shape=shape, use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))

    descriptor_model.add(Conv2D(32, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))

    descriptor_model.add(Conv2D(64, 3, padding='same', strides=2, use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))

    descriptor_model.add(Conv2D(64, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))

    descriptor_model.add(Conv2D(128, 3, padding='same', strides=2,  use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))

    descriptor_model.add(Conv2D(128, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
    descriptor_model.add(BatchNormalization(axis = -1))
    descriptor_model.add(Activation(activate))
    descriptor_model.add(Dropout(0.5))

    descriptor_model.add(Conv2D(128, 8, padding='valid', use_bias = True, kernel_initializer=init_weights))

    # Final descriptor reshape
    descriptor_model.add(Reshape((128,)))

    return descriptor_model
  

def triplet_loss(x):
  
    output_dim = 128
    a, p, n = x
    _alpha = 1.0
    positive_distance = K.mean(K.square(a - p), axis=-1)
    negative_distance = K.mean(K.square(a - n), axis=-1)

    return K.expand_dims(K.maximum(0.0, positive_distance - negative_distance + _alpha), axis = 1)

## Denoising Image Patches


In [None]:
from keras.layers import LeakyReLU
shape = (32, 32, 1)
denoise_model = keras.models.load_model('./denoise_modified.h5')

## Training a Descriptor Network
In the last section we trained a model that given a noisy patch, outputs a denoised version of it. We hoped that by doing so, we will improve the performance of the second part, which is training a network that outputs the descriptor. As we mentioned, a descriptor is a numerical vector that represents the small images we have. The dataset consists of a large number of small images, which are cropped patches from other larger images. Hence, they represent some local part of a scene. That is why there are no objects represented, only corners or textures. Each of these patches is related to a subset of other patches of the dataset by some kind of geometric transformation (e.g. rotation).  For a given patch, we want the network to output a vector that is close to the vectors of the patches that represent the same local part of a scene, while being far from patches do not represent that local part of a scene.

To do so, we will build a convolutional neural network that takes the input of $32\times32$ and outputs a descriptor of size $128$. For the loss, we use the triplet loss, which takes an anchor patch, a negative patch and a positive patch. The idea is to train the network so the descriptors from the anchor and positive patch have a low distance between them, and the negative and anchor patch have a large distance between them. 

In this cell we generate a triplet network, which is a network formed by three copies of the same network. That means that the descriptor model will compute the descriptor for the input `'a'` (anchor), the same descriptor model (with the same weights) will compute the descriptor for the input `'p'` (positive), and again the same model will compute the descriptor for the input `'n'` (negative). 

**Updated explanation**: Due to the way Keras handles the compile method, it needs a loss as an argument in that compile method. However, our loss is computed in the lambda layer, so we want to minimize the output of that layer. As we want to minimize the output of the Lambda function (in this case the triplet loss), we output as the label in the training_generator a vector of zeros and we compute the mean absolute error of the triplet loss and this vector of zeros. To give you an intuition, what we aim to minimize is
$$  |\text{triplet_loss} - 0| =  |\text{triplet_loss}| = \text{triplet_loss} $$



In [None]:
from keras.layers import Lambda
shape = (32, 32, 1)
xa = Input(shape=shape, name='a')
xp = Input(shape=shape, name='p')
xn = Input(shape=shape, name='n')

descriptor_model_relu = get_descriptor_model( shape)
ea = descriptor_model(xa)
ep = descriptor_model(xp)
en = descriptor_model(xn)
loss = Lambda(triplet_loss)([ea, ep, en])


adam_amsgrad = keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)
sgd = keras.optimizers.SGD(lr=0.00001, momentum=0.9, nesterov=True)
adadelta =  keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

descriptor_model_trip_adam_amsgrad = Model(inputs=[xa, xp, xn], outputs=loss)
descriptor_model_trip_sgd = Model(inputs=[xa, xp, xn], outputs=loss)
descriptor_model_trip_adadelta = Model(inputs=[xa, xp, xn], outputs=loss)
descriptor_model_trip_nadam = Model(inputs=[xa, xp, xn], outputs=loss)
descriptor_model_trip_adam = Model(inputs=[xa, xp, xn], outputs=loss)


descriptor_model_trip_adam_amsgrad.compile(loss='mean_absolute_error', optimizer=adam)
descriptor_model_trip_sgd.compile(loss='mean_absolute_error', optimizer=adam_amsgrad)
descriptor_model_trip_adadelta.compile(loss='mean_absolute_error', optimizer=adadelta)
descriptor_model_trip_nadam.compile(loss='mean_absolute_error', optimizer=sgd)
descriptor_model_trip_adam.compile(loss='mean_absolute_error', optimizer=nadam)


In [None]:
### Descriptor loading and training
# Loading images
hPatches = HPatches(train_fnames=train_fnames, test_fnames=test_fnames,
                    denoise_model=denoise_model, use_clean=False)

# Creating training generator
training_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=1), num_triplets=100000)
# Creating validation generator
val_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=0), num_triplets=10000)

In [None]:
plot_triplet(training_generator)

In [None]:
#epochs = 1
### As with the denoising model, we use a loop to save for each epoch 
## #the weights in an external website in case colab stops. 
### reset, so e.g. calling 5 times fit(epochs=1) behave as fit(epochs=5)

### If you have a model saved from a previous training session
### Load it in the next line
# descriptor_model_trip.set_weights(keras.models.load_model('./descriptor.h5').get_weights())
# descriptor_model_trip.optimizer = keras.models.load_model('./descriptor.h5').optimizer

#for e in range(epochs):
  
descriptor_history_adam_amsgrad = descriptor_model_trip_adam_amsgrad.fit_generator(generator=training_generator, epochs=5, verbose=1, validation_data=val_generator)

descriptor_history_sgd = descriptor_model_trip_sgd.fit_generator(generator=training_generator, epochs=5, verbose=1, validation_data=val_generator)

descriptor_history_adadelta = descriptor_model_trip_adadelta.fit_generator(generator=training_generator, epochs=5, verbose=1, validation_data=val_generator)

descriptor_history_nadam = descriptor_model_trip_nadam.fit_generator(generator=training_generator, epochs=5, verbose=1, validation_data=val_generator)

descriptor_history_adam = descriptor_model_trip_adam.fit_generator(generator=training_generator, epochs=5, verbose=1, validation_data=val_generator)


In [None]:
import matplotlib.pyplot as plt

In [None]:
def plot_history(history, history2, history3, history4, history5, metric = None):
  # Plots the loss history of training and validation (if existing)
  # and a given metric
  
  if metric != None:
    fig, axes = plt.subplots(2,1, figsize=(8, 10))
    #axes[0].plot(history.history[metric])
    #axes[0].plot(history2.history[metric])
    #axes[0].plot(history3.history[metric])
    #axes[0].plot(history4.history[metric])
    #axes[0].plot(history5.history[metric])
    #axes[0].plot(history6.history[metric])
    try:
      #axes[0].plot(history.history['val_'+metric])
      #axes[0].plot(history2.history['val2_'+metric])
      #axes[0].plot(history3.history['val3_'+metric])
      #axes[0].legend(['ReLU', 'ELU', 'SELU', 'Linear', 'LeakyReLU'], loc='upper right')
    except:
      pass
    #axes[0].set_title('MAE Vs. No of Epochs for Various Activation Functions')
    #axes[0].set_ylabel('Mean Absolute Error')
    #axes[0].set_xlabel('Epoch')
    fig.subplots_adjust(hspace=0.5)
    axes[1].plot(history.history['loss'])
    axes[1].plot(history2.history['loss'])
    axes[1].plot(history3.history['loss'])
    axes[1].plot(history4.history['loss'])
    axes[1].plot(history5.history['loss'])
    try:
      #axes[1].plot(history.history['val_loss'])
      axes[1].legend(['Adam', 'Adam_Amsgrad', 'Adadelta', 'SGD', 'Nadam'], loc='upper right')
    except:
      pass
    axes[1].set_title('MAE Vs. No of Epochs for Various Optimizers')
    axes[1].set_ylabel('Mean Absolute Error')
    axes[1].set_xlabel('Epoch')
  else:
    plt.plot(history.history['loss'])
    try:
      plt.plot(history.history['val_loss'])
      plt.legend(['Train', 'Val'])
    except:
      pass
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
  
plot_history(descriptor_history_adam, descriptor_history_adam_amsgrad, descriptor_history_adadelta, descriptor_history_sgd, descriptor_history_nadam, 'mean_absolute_error')

In [None]:
def plot_val_history(history, history2, history3, history4, history5, metric = None):
  # Plots the loss history of training and validation (if existing)
  # and a given metric
  
  if metric != None:
    fig, axes = plt.subplots(2,1, figsize=(8, 10))
    #axes[0].plot(history.history[metric])
    #axes[0].plot(history2.history[metric])
    #axes[0].plot(history3.history[metric])
    try:
      #axes[0].plot(history.history['val_'+metric])
      #axes[0].plot(history2.history['val_'+metric])
      #axes[0].plot(history3.history['val_'+metric])
      #axes[0].plot(history4.history['val_'+metric])
      #axes[0].plot(history5.history['val_'+metric])
      #axes[0].plot(history6.history['val_'+metric])
      #axes[0].legend(['ReLU', 'ELU', 'SELU', 'Linear', 'LeakyReLU'], loc='upper right')

    except:
      pass
    #axes[0].set_title('Validation Loss Vs. No of Epochs for Various Activation Functions')
    #axes[0].set_ylabel('Validation Loss')
    #axes[0].set_xlabel('Epoch')
    fig.subplots_adjust(hspace=0.5)
    #axes[1].plot(history.history['loss'])
    #axes[1].plot(history2.history['loss'])
    #axes[1].plot(history3.history['loss'])
    try:
      axes[1].plot(history.history['val_loss'])
      axes[1].plot(history2.history['val_loss'])
      axes[1].plot(history3.history['val_loss'])
      axes[1].plot(history4.history['val_loss'])
      axes[1].plot(history5.history['val_loss'])
      axes[1].legend(['Adam', 'Adam_Amsgrad', 'Adadelta', 'SGD', 'Nadam'], loc='upper right')

    except:
      pass
    axes[1].set_title('Validation Loss Vs. No of Epochs for Various Optimizers')
    axes[1].set_ylabel('Validation Loss')
    axes[1].set_xlabel('Epoch')
  else:
    plt.plot(history.history['loss'])
    try:
      plt.plot(history.history['val_loss'])
      plt.legend(['Train', 'Val'])
    except:
      pass
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
  
plot_val_history(descriptor_history_adam, descriptor_history_adam_amsgrad, descriptor_history_adadelta, descriptor_history_sgd, descriptor_history_nadam, 'mean_absolute_error')