In [4]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import random, gc, keras, os

from keras import backend as K
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16

%matplotlib inline

Using TensorFlow backend.


# Load Data

In [5]:
train_df = pd.read_csv('./data/triplet/train.csv')
val_df = pd.read_csv('./data/triplet/validation.csv')
test_df = pd.read_csv('./data/triplet/test.csv')

print('Train:\t\t', train_df.shape)
print('Validation:\t', val_df.shape)
print('Test:\t\t', test_df.shape)

print('\nTrain Landmarks:\t', len(train_df['landmark_id'].unique()))
print('Validation Landmarks:\t', len(val_df['landmark_id'].unique()))
print('Test Landmarks:\t\t', len(test_df['landmark_id'].unique()))

Train:		 (113783, 4)
Validation:	 (22255, 4)
Test:		 (22391, 4)

Train Landmarks:	 14943
Validation Landmarks:	 7674
Test Landmarks:		 14436


In [6]:
train_df.head()

Unnamed: 0,image_id,id,url,landmark_id
0,465272,a2ccf8ed2e969f6a,https://lh4.googleusercontent.com/-TPHkS5gzvm4...,0
1,64516,e205ca7c8dd7c027,https://lh3.googleusercontent.com/-V3RjsZtGpxE...,0
2,928409,4e8ab93c1620e8a3,http://mw2.google.com/mw-panoramio/photos/medi...,0
3,88809,896bf928214d1ca4,http://lh5.ggpht.com/-Cy0l41uUaGA/R--yB8vy41I/...,0
4,1001133,375d2a153bdca926,http://lh6.ggpht.com/-UqzFpnqE9bU/S_0u1RovfdI/...,0


# Helper Functions

In [7]:
# read all images
def get_all_images(df, img_size=(224, 224), prefix='./data/triplet/train'):
    """ get all test images """
    img_ids = df['image_id'].values
    landmark_ids = df['landmark_id'].values
    images = []
    
    for idx in img_ids:
        path = prefix + str(idx) + '.jpg'
        tmp_img = load_img(path, target_size=img_size)
        tmp_img = img_to_array(tmp_img)
        images.append(tmp_img)
        
    # transform list to array
    images = np.array(images, dtype=K.floatx()) / 255.0
    
    return images

In [8]:
# training set triplet generator
def train_triplet_generator(df, batch_size=42, img_size=(224, 224), seed=42, 
                            prefix='./data/triplet/train/'):
    """ training set triplet generator
        it will generate 7400 triplet images in total
    """
    # get images with only one training image landmark id and the rest landmark ids
    np.random.seed(seed)
    grouped = df[['landmark_id', 'image_id']].groupby('landmark_id').count().reset_index()
    unique_neg_ids = list(grouped[grouped['image_id'] == 1]['landmark_id'].values)
    rest_ids = list(grouped[grouped['image_id'] > 1]['landmark_id'].values)
    size = 7400 * 2 - len(unique_neg_ids) 
    
    while True:
        # get positive and negative image landmark ids
        np.random.shuffle(rest_ids)
        candidate_ids = list(np.random.choice(rest_ids, size=size, replace=False))
        pos_landmark_ids = candidate_ids[:7400]
        neg_landmark_ids = candidate_ids[7400:] + unique_neg_ids
        np.random.shuffle(neg_landmark_ids)
        
        # transform landmark id into image id
        anc_img_ids = []
        pos_img_ids = []
        neg_img_ids = []
        
        for i in range(len(pos_landmark_ids)):
            tmp_pos_ids = df[df['landmark_id'] == pos_landmark_ids[i]]['image_id'].values
            anc_img_ids.append(tmp_pos_ids[0])
            pos_img_ids.append(tmp_pos_ids[1])
            
            tmp_neg_ids = df[df['landmark_id'] == neg_landmark_ids[i]]['image_id'].values
            neg_img_ids.append(tmp_neg_ids[0])
        
        # iterator to read batch images
        for j in range(len(pos_img_ids) // batch_size):
            batch_anc_img_ids = anc_img_ids[j * batch_size: (j + 1) * batch_size]
            batch_pos_img_ids = pos_img_ids[j * batch_size: (j + 1) * batch_size]
            batch_neg_img_ids = neg_img_ids[j * batch_size: (j + 1) * batch_size]
            
            # get images
            anc_imgs = []
            pos_imgs = []
            neg_imgs = []
            
            # iteratively read images
            for k in range(batch_size):
                anc_path = prefix + str(batch_anc_img_ids[k]) + '.jpg'
                pos_path = prefix + str(batch_pos_img_ids[k]) + '.jpg'
                neg_path = prefix + str(batch_neg_img_ids[k]) + '.jpg'
                
                tmp_anc_img = load_img(anc_path, target_size=img_size)
                tmp_anc_img = img_to_array(tmp_anc_img)
                anc_imgs.append(tmp_anc_img)
                
                tmp_pos_img = load_img(pos_path, target_size=img_size)
                tmp_pos_img = img_to_array(tmp_pos_img)
                pos_imgs.append(tmp_pos_img)
                
                tmp_neg_img = load_img(neg_path, target_size=img_size)
                tmp_neg_img = img_to_array(tmp_neg_img)
                neg_imgs.append(tmp_neg_img)
        
            # transform list to array
            anc_imgs = np.array(anc_imgs, dtype=K.floatx()) / 255.0
            pos_imgs = np.array(pos_imgs, dtype=K.floatx()) / 255.0
            neg_imgs = np.array(neg_imgs, dtype=K.floatx()) / 255.0

            zeros = np.zeros((batch_size, 1), dtype=K.floatx())
            
#             yield {'anchor_input': anc_imgs, 'positive_input': pos_imgs, 'negative_input': neg_imgs}
            yield [anc_imgs, pos_imgs, neg_imgs], [zeros, zeros, zeros]

In [9]:
# validation set triplet generator
def val_triplet_generator(df, batch_size=128, img_size=(224, 224), 
                          seed=42, prefix='./data/triplet/validation'):
    """ validation set triplet collector """
    
     # get images with only one image landmark id and the rest landmark ids
    grouped = df[['landmark_id', 'image_id']].groupby('landmark_id').count().reset_index()
    unique_neg_ids = list(grouped[grouped['image_id'] == 1]['landmark_id'].values)
    rest_ids = list(grouped[grouped['image_id'] > 1]['landmark_id'].values)
    size = 3072 * 2 - len(unique_neg_ids) 
    
    while True:
        # get positive and negative image landmark ids
        np.random.seed(42)
        candidate_ids = list(np.random.choice(rest_ids, size=size, replace=False))
        pos_landmark_ids = candidate_ids[:3072]
        neg_landmark_ids = candidate_ids[3072:] + unique_neg_ids
        np.random.shuffle(neg_landmark_ids)
        
        # transform landmark id into image id
        anc_img_ids = []
        pos_img_ids = []
        neg_img_ids = []
        
        for i in range(len(pos_landmark_ids)):
            tmp_pos_ids = df[df['landmark_id'] == pos_landmark_ids[i]]['image_id'].values
            anc_img_ids.append(tmp_pos_ids[0])
            pos_img_ids.append(tmp_pos_ids[1])
            
            tmp_neg_ids = df[df['landmark_id'] == neg_landmark_ids[i]]['image_id'].values
            neg_img_ids.append(tmp_neg_ids[0])
        
        # iterator to read batch images
        for j in range(len(pos_img_ids) // batch_size):
            batch_anc_img_ids = anc_img_ids[j * batch_size: (j + 1) * batch_size]
            batch_pos_img_ids = pos_img_ids[j * batch_size: (j + 1) * batch_size]
            batch_neg_img_ids = neg_img_ids[j * batch_size: (j + 1) * batch_size]
            
            # get images
            anc_imgs = []
            pos_imgs = []
            neg_imgs = []
            
            # iteratively read images
            for k in range(batch_size):
                anc_path = prefix + str(batch_anc_img_ids[k]) + '.jpg'
                pos_path = prefix + str(batch_pos_img_ids[k]) + '.jpg'
                neg_path = prefix + str(batch_neg_img_ids[k]) + '.jpg'
                
                tmp_anc_img = load_img(anc_path, target_size=img_size)
                tmp_anc_img = img_to_array(tmp_anc_img)
                anc_imgs.append(tmp_anc_img)
                
                tmp_pos_img = load_img(pos_path, target_size=img_size)
                tmp_pos_img = img_to_array(tmp_pos_img)
                pos_imgs.append(tmp_pos_img)
                
                tmp_neg_img = load_img(neg_path, target_size=img_size)
                tmp_neg_img = img_to_array(tmp_neg_img)
                neg_imgs.append(tmp_neg_img)
        
            # transform list to array
            anc_imgs = np.array(anc_imgs, dtype=K.floatx()) / 255.0
            pos_imgs = np.array(pos_imgs, dtype=K.floatx()) / 255.0
            neg_imgs = np.array(neg_imgs, dtype=K.floatx()) / 255.0
            
            zeros = np.zeros((batch_size, 1), dtype=K.floatx())
            
#             yield {'anchor_input': anc_imgs, 'positive_input': pos_imgs, 'negative_input': neg_imgs}
            yield [anc_imgs, pos_imgs, neg_imgs], [zeros, zeros, zeros]

# Define Triplet Loss Model

In [10]:
# Define base network for triplet network
def base_net(input_shape=(150, 150, 3), trainable=False):
    """ define triplet network """
    # load pre-trained VGG16 model
    vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=input_shape)
    vgg16.trainable = trainable
    
    # define sequential model
    model = Sequential(name='base_net')
    model.add(vgg16)
    model.add(Flatten(name='flatten'))
    model.add(Dense(512, activation='relu', name='fc1'))
    model.add(Dense(128, activation=None, name='fc2'))
    model.add(Lambda(lambda x: K.l2_normalize(x, axis=1), name='l2_norm'))
    
    return model

In [11]:
# Define triplet network
def triplet_net(base_model, input_shape=(150, 150, 3)):
    """ function to define triplet networks """
    # define input: anchor, positive, negative
    anchor = Input(shape=input_shape, name='anchor_input')
    positive = Input(shape=input_shape, name='positive_input')
    negative = Input(shape=input_shape, name='negative_input')
    
    # extract vector represent using CNN based model
    anchor_vec = base_model(anchor)
    pos_vec = base_model(positive)
    neg_vec = base_model(negative)
    
    # define inputs and outputs
    inputs=[anchor, positive, negative]
    outputs=[anchor_vec, pos_vec, neg_vec]
    
    # define the triplet model
    model = Model(inputs=inputs, outputs=outputs, name='triplet_net')
    
    return model

In [12]:
# Define triplet loss
def triplet_loss(y_true, y_pred):
    """ function to compute triplet loss
        margin is predefined coded, manually change if needed
    """
    # define triplet margin
    margin = 0.2
    
    # get the prediction vector
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
    
    # compute distance
    pos_distance = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=-1)
    neg_distance = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), axis=-1)
    
    # compute loss
    partial_loss = tf.subtract(pos_distance, neg_distance) + margin
    full_loss = tf.reduce_sum(tf.maximum(partial_loss, 0.0))
    
    return full_loss

# Model Validation

In [13]:
# img_size = (224, 224, 3)  # target image size

# #Summary of pre-trained VGG16 model
# vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=img_size)
# vgg16.summary()

In [14]:
# # model test
# base_model = base_net(input_shape=img_size, trainable=False)
# base_model.summary()

In [15]:
# # model test
# triplet_model = triplet_net(base_model=base_model, input_shape=img_size)
# triplet_model.summary()

# Model Training

In [16]:
# For reproduciable purpose
seed = 42
K.clear_session()
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(seed)
random.seed(seed)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(seed)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

# Define Parameters
img_size = (224, 224, 3)  # target image size

# triplet image generator
train_generator = train_triplet_generator(train_df, batch_size=128, img_size=img_size[:2], 
                                          seed=42, prefix='./data/triplet/train/')

val_generator = val_triplet_generator(val_df, batch_size=128, img_size=img_size[:2], 
                                      seed=42, prefix='./data/triplet/validation/')

In [25]:
# Define triplet network model
base_model = base_net(input_shape=img_size, trainable=False)
triplet_model = triplet_net(base_model=base_model, input_shape=img_size)

# define optimizer
opt = keras.optimizers.Adam()

# compile the model
triplet_model.compile(optimizer=opt, loss=triplet_loss)

# Create call backs
checkpoint = ModelCheckpoint(filepath='./models/triplet-initial-ckpt.h5')
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
callbacks = [checkpoint, lr_reducer]

In [26]:
# fit the mode
history = triplet_model.fit_generator(train_generator, steps_per_epoch=1, epochs=2, verbose=2, 
                                      callbacks=callbacks, validation_data=val_generator, validation_steps=24)

triplet_model.save('./models/triplet-initial-model.h5')
_ = gc.collect()

Epoch 1/2
 - 20s - loss: 0.7900 - base_net_loss: 0.3000 - val_loss: 0.5793 - val_base_net_loss: 0.2042
Epoch 2/2
 - 19s - loss: 0.6188 - base_net_loss: 0.2222 - val_loss: 0.5738 - val_base_net_loss: 0.2036


# Make Prediction