# Google Landmark Recognition 

In [1]:
from tqdm import tqdm_notebook as tqdm
import os
import pandas as pd
import numpy
import pickle
from keras.applications.xception import Xception
from keras.layers import Activation,Flatten, Dense, AveragePooling2D, Dropout, GlobalAveragePooling2D,Conv2D,BatchNormalization
from keras.models import Model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, Callback
from keras.preprocessing.image import ImageDataGenerator
import math
from hadamard import *
import keras.backend as K
from keras.metrics import top_k_categorical_accuracy
import tensorflow as tf

# MultiGPU model build on top of
# https://github.com/sallamander/multi-gpu-keras-tf/
import numpy as np
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_resnet_v2 import InceptionResNetV2

from keras.applications import xception
from keras.applications import inception_v3
from sklearn import preprocessing
import shutil
from random import randint
from sklearn.metrics import average_precision_score
from keras.models import load_model
from keras.preprocessing import image
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Prepare data

In [2]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

In [3]:
df_train.head()

Unnamed: 0,id,url,landmark_id
0,cacf8152e2d2ae60,http://static.panoramio.com/photos/original/70...,4676
1,0a58358a2afd3e4e,http://lh6.ggpht.com/-igpT6wu0mIA/ROV8HnUuABI/...,6651
2,6b2bb500b6a38aa0,http://lh6.ggpht.com/-vKr5G5MEusk/SR6r6SJi6mI/...,11284
3,b399f09dee9c3c67,https://lh3.googleusercontent.com/-LOW2cjAqubA...,8429
4,19ace29d77a5be66,https://lh5.googleusercontent.com/-tnmSXwQcWL8...,6231


In [4]:
df_test.head()

Unnamed: 0,id,url
0,000088da12d664db,https://lh3.googleusercontent.com/-k45wfamuhT8...
1,0001623c6d808702,https://lh3.googleusercontent.com/-OQ0ywv8KVIA...
2,0001bbb682d45002,https://lh3.googleusercontent.com/-kloLenz1xZk...
3,0002362830cfe3a3,https://lh3.googleusercontent.com/-N6z79jNZYTg...
4,000270c9100de789,https://lh3.googleusercontent.com/-keriHaVOq1U...


In [5]:
le = preprocessing.LabelEncoder()
le.fit(df_train['landmark_id'].values)
print(le.classes_)
labels = le.transform(df_train['landmark_id'].values)
df_train["label"] = labels
classlength = len(list(le.classes_))

[    0     1     2 ... 14948 14949 14950]


In [6]:
classlength

14951

#### Set actual path to the image file

In [7]:
df_train['Path'] = df_train['id'].map(lambda x: '256sized/' + str(x) + '.jpg')
df_train['imgName'] = df_train['id'].map(lambda x: str(x) + '.jpg')

df_test['Path'] = df_test['id'].map(lambda x: 'test/' + str(x) + '.jpg')
df_test['imgName'] = df_test['id'].map(lambda x: str(x) + '.jpg')



In [8]:
df_train = df_train.rename(columns={'Path': 'imgpath', 'label':'target'})

df_test = df_test.rename(columns={'Path': 'imgpath'})

In [9]:
df_train.head()

Unnamed: 0,id,url,landmark_id,target,imgpath,imgName
0,cacf8152e2d2ae60,http://static.panoramio.com/photos/original/70...,4676,4676,256sized/cacf8152e2d2ae60.jpg,cacf8152e2d2ae60.jpg
1,0a58358a2afd3e4e,http://lh6.ggpht.com/-igpT6wu0mIA/ROV8HnUuABI/...,6651,6651,256sized/0a58358a2afd3e4e.jpg,0a58358a2afd3e4e.jpg
2,6b2bb500b6a38aa0,http://lh6.ggpht.com/-vKr5G5MEusk/SR6r6SJi6mI/...,11284,11284,256sized/6b2bb500b6a38aa0.jpg,6b2bb500b6a38aa0.jpg
3,b399f09dee9c3c67,https://lh3.googleusercontent.com/-LOW2cjAqubA...,8429,8429,256sized/b399f09dee9c3c67.jpg,b399f09dee9c3c67.jpg
4,19ace29d77a5be66,https://lh5.googleusercontent.com/-tnmSXwQcWL8...,6231,6231,256sized/19ace29d77a5be66.jpg,19ace29d77a5be66.jpg


### Format Data for Keras
1. Make folder with class labels as names in a overall folder
2. Move images in folder to respective folder classes
3. Create Validation folder and move images from train to validation folder 

In [10]:
if (os.path.exists("train")==False):
    os.mkdir("train")
for cls in (le.classes_):
    if (os.path.exists("train/"+str(cls))==False):
        os.mkdir("train/"+str(cls))

In [11]:
for _, c_row in tqdm(df_train.iterrows(), total=df_train.shape[0]):
    if (os.path.exists(c_row['imgpath'])):
        shutil.move(c_row['imgpath'], "train/" + str(c_row["target"])+ "/" + c_row["imgName"])


In [12]:
if (os.path.exists("val")==False):
    os.mkdir("val")
for cls in (le.classes_):
    if (os.path.exists("val/"+str(cls))==False):
        os.mkdir("val/"+str(cls))
for cls in (le.classes_):
    if (os.path.exists("train/"+str(cls))):
        images = os.listdir("train/"+str(cls))
        total_num = len(images)
        get_num = int(total_num / 10)
        repeated = []
        for impath in images[:get_num]:
            shutil.move("train/"+str(cls)+"/" +impath, "val/" + str(cls)+ "/" + impath)

In [14]:
train_length = len(df_train)
df_valid = df_train[:int(train_length/10)]
df_train = df_train[int(train_length/10):]

# Start from here modelling

Multi Gpu Model for keras definition, original buildin function does not support saving of model

In [9]:
from keras.layers import Lambda, concatenate
from keras import Model

import tensorflow as tf

def multi_gpu_models(model, gpus):
    if isinstance(gpus, (list, tuple)):
        num_gpus = len(gpus)
        target_gpu_ids = gpus
    else:
        num_gpus = gpus
        target_gpu_ids = range(num_gpus)

    def get_slice(data, i, parts):
        shape = tf.shape(data)
        batch_size = shape[:1]
        input_shape = shape[1:]
        step = batch_size // parts
        if i == num_gpus - 1:
            size = batch_size - step * i
        else:
            size = step
        size = tf.concat([size, input_shape], axis=0)
        stride = tf.concat([step, input_shape * 0], axis=0)
        start = stride * i
        return tf.slice(data, start, size)

    all_outputs = []
    for i in range(len(model.outputs)):
        all_outputs.append([])

    # Place a copy of the model on each GPU,
    # each getting a slice of the inputs.
    for i, gpu_id in enumerate(target_gpu_ids):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('replica_%d' % gpu_id):
                inputs = []
                # Retrieve a slice of the input.
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_i = Lambda(get_slice,
                                     output_shape=input_shape,
                                     arguments={'i': i,
                                                'parts': num_gpus})(x)
                    inputs.append(slice_i)

                # Apply model on slice
                # (creating a model replica on the target device).
                outputs = model(inputs)
                if not isinstance(outputs, list):
                    outputs = [outputs]

                # Save the outputs for merging back together later.
                for o in range(len(outputs)):
                    all_outputs[o].append(outputs[o])

          # Merge outputs on CPU.
    with tf.device('/cpu:0'):
        merged = []
        for name, outputs in zip(model.output_names, all_outputs):
            merged.append(concatenate(outputs,
                                       axis=0, name=name))
        return Model(model.inputs, merged)

Evaluation metric initialization

In [16]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: %d - score: %.6f \n" % (epoch+1, score))


def GAP_vector(pred, conf, true, return_x=False):
    '''
    Compute Global Average Precision (aka micro AP), the metric for the
    Google Landmark Recognition competition. 
    This function takes predictions, labels and confidence scores as vectors.
    In both predictions and ground-truth, use None/np.nan for "no label".

    Args:
        pred: vector of integer-coded predictions
        conf: vector of probability or confidence scores for pred
        true: vector of integer-coded labels for ground truth
        return_x: also return the data frame used in the calculation

    Returns:
        GAP score
    '''
    x = pd.DataFrame({'pred': pred, 'conf': conf, 'true': true})
    x.sort_values('conf', ascending=False, inplace=True, na_position='last')
    x['correct'] = (x.true == x.pred).astype(int)
    x['prec_k'] = x.correct.cumsum() / (np.arange(len(x)) + 1)
    x['term'] = x.prec_k * x.correct
    gap = x.term.sum() / x.true.count()
    if return_x:
        return gap, x
    else:
        return gap

In [7]:
model_name = "INCEPTION_RESNET_V2_real_HADAMARD_unknown"
models_savename = "./models/" + model_name


batch_size = 300  # 258
img_width = 224
img_height = 224

Center Loss Layer Definition

In [31]:
class CenterLossLayer(Layer):

    def __init__(self, alpha=0.5, **kwargs):
        super().__init__(**kwargs)
        self.alpha = alpha

    def build(self, input_shape):
        self.centers = self.add_weight(name='centers',
                                       shape=(14952, 14952),
                                       initializer='uniform',
                                       trainable=False)
        # self.counter = self.add_weight(name='counter',
        #                                shape=(1,),
        #                                initializer='zeros',
        #                                trainable=False)  # just for debugging
        super().build(input_shape)

    def call(self, x, mask=None):

        # x[0] is Nx2, x[1] is Nx10 onehot, self.centers is 10x2
        delta_centers = K.dot(K.transpose(x[1]), (K.dot(x[1], self.centers) - x[0]))  # 10x2
        center_counts = K.sum(K.transpose(x[1]), axis=1, keepdims=True) + 1  # 10x1
        delta_centers /= center_counts
        new_centers = self.centers - self.alpha * delta_centers
        self.add_update((self.centers, new_centers), x)

        # self.add_update((self.counter, self.counter + 1), x)

        self.result = x[0] - K.dot(x[1], self.centers)
        self.result = K.sum(self.result ** 2, axis=1, keepdims=True) #/ K.dot(x[1], center_counts)
        return self.result # Nx1

    def compute_output_shape(self, input_shape):
        return K.int_shape(self.result)


In [14]:
def zero_loss(y_true, y_pred):
    return 0.5 * K.sum(y_pred, axis=0)

In [10]:
from keras.layers import Input, Dense, Flatten, BatchNormalization
def resnet_model(labels):
    model0 = ResNet50(include_top=False, weights='imagenet',
                        input_tensor=None, input_shape=(img_width, img_height, 3))


    for lay in model0.layers:
        lay.trainable = True
    
    x = model0.output
    x = GlobalAveragePooling2D(name='avg_pool_head')(x)
    x = Dense(14952, activation='relu')(x)
    main = HadamardClassifier(14952, activation='softmax')(x) #number of class

    side = CenterLossLayer(alpha=0.5, name='centerlosslayer')([x, labels])

    model = Model(model0.input, main)
    model_features =Model(model0.input, x)
    center_loss_model = Model(inputs=[model0.input, labels], outputs=[main, side])
    return model, model_features, center_loss_model

Create Multiple Model to extract different parts of the model 

In [11]:
aux_input = Input((14952,))
normal_model, feature_model, center_loss_model = resnet_model(aux_input)

In [12]:
model_GPU = multi_gpu_models(normal_model,gpus=4)

In [12]:
model_GPU_center = multi_gpu_models(center_loss_model,gpus=4)

In [13]:
model_GPU_center.load_weights('models/INCEPTION_RESNET_V2_real_HADAMARD_unknown_centerloss_016-0.5823538.hdf5')

Since only one model is created the weights is shared between all three forms of the model, verify that the layer weights change after loading the weights from the multi-gpu-model below

In [17]:
center_loss_model.get_layer('dense_1').get_weights()

[array([[ 0.0096401 ,  0.00702382, -0.00371774, ...,  0.01623657,
         -0.01004059, -0.03414487],
        [ 0.00435385,  0.00690936, -0.02781754, ...,  0.00286781,
          0.00811644, -0.00835009],
        [ 0.00028313, -0.02433074, -0.03405078, ...,  0.00901906,
         -0.00256233, -0.00651655],
        ...,
        [ 0.00046729, -0.02588187, -0.0647682 , ...,  0.04849853,
          0.00852132,  0.01085433],
        [-0.01744856, -0.01838162,  0.05228666, ...,  0.02140591,
          0.01926779,  0.01351724],
        [-0.00786625, -0.03124017, -0.02701088, ..., -0.00764332,
          0.00399612,  0.01563823]], dtype=float32),
 array([-0.02850822, -0.00847463, -0.00387306, ..., -0.00280991,
        -0.00339114,  0.00287837], dtype=float32)]

Initialize lambda funtion for centerloss weightage

In [15]:
from keras import losses
lambda_centerloss = 0.1
optim = Adam(lr=0.00002)
model_GPU_center.compile(optimizer=optim,
                  loss=[losses.categorical_crossentropy, zero_loss],
                  loss_weights=[1, lambda_centerloss],
                  metrics=['accuracy'])

In [15]:
model_GPU.load_weights('models/saved_models/RESNET50_HADAMARD_unknown_014-0.1729437.h5')

In [10]:
model = multi_gpu_models(model,gpus=4)

In [35]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.00003), metrics=[top_k_categorical_accuracy, 'accuracy'])


### I added in extra classes for some models to tackle the unknown class problem

In [12]:
class_names = []
for cls in list(le.classes_):
    class_names.append(str(cls))

In [17]:
class_names.append(str(14951))

In [18]:
len(class_names)

14952

## Keras flow from directory for centerloss model

In [13]:
# Data generator
batch_size=200
train_data_dir = "training"
val_data_dir = "validation"
train_datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size = (img_width, img_height),
        batch_size = batch_size,
        shuffle = True,
        classes = class_names,
        class_mode = 'sparse')

val_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = val_datagen.flow_from_directory(
        val_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle = True,
        classes = class_names,
        class_mode = 'sparse')

Found 1102624 images belonging to 14951 classes.
Found 114799 images belonging to 14951 classes.


## Custom generator to feed in dummy centers

In [35]:
from keras.utils import to_categorical
def my_generator(generator):
    while True:
        data = next(generator)
        x, y = data[0], data[1]
        y = list(map(int, y))
        y_onehot = to_categorical(y, 14952)
        dummy = np.zeros((x.shape[0], 1))
    
        yield [x, y_onehot], [y_onehot, dummy]



In [13]:
model_name = "XCEPTION"
models_savename = "./models/" + model_name

In [14]:
os.makedirs("./models", exist_ok=True)
early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
rlrop = ReduceLROnPlateau(monitor='val_loss',mode='auto',patience=2,verbose=1,factor=0.5,cooldown=0,min_lr=1e-6)


callbacks = [ModelCheckpoint(monitor='val_loss',
                             filepath= models_savename + '_{epoch:03d}-{val_loss:.7f}.hdf5',
                             save_best_only=False,
                             save_weights_only=False,
                             mode='max'),
             TensorBoard(log_dir='logs/{}'.format(model_name)),
             rlrop,
             early]



In [25]:
model_GPU_center.fit_generator(generator=my_generator(train_generator),
                    steps_per_epoch=math.ceil(1104367 / batch_size),
                    verbose=1,
                    callbacks=callbacks,
                    validation_data=my_generator(validation_generator),
                    initial_epoch=16,
                    epochs=17,
                    use_multiprocessing=True,
                    max_queue_size=10,
                    workers = 20,
                    validation_steps=math.ceil(114799 / batch_size))



Epoch 17/17





<keras.callbacks.History at 0x7f9032796780>

In [26]:
optim = Adam(lr=0.00001)
model_GPU_center.compile(optimizer=optim,
                  loss=[losses.categorical_crossentropy, zero_loss],
                  loss_weights=[1, lambda_centerloss],
                  metrics=['accuracy'])

## Keras flow from directory for normal softmax model

In [15]:
# Data generator
batch_size=120
train_data_dir = "training"
val_data_dir = "validation"
train_datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size = (img_width, img_height),
        batch_size = batch_size,
        shuffle = True,
        classes = class_names,
        class_mode = 'categorical')

val_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = val_datagen.flow_from_directory(
        val_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle = True,
        classes = class_names,
        class_mode = 'categorical')

Found 1102624 images belonging to 14951 classes.
Found 114799 images belonging to 14951 classes.


In [18]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.00005), metrics=[top_k_categorical_accuracy, 'accuracy'])


In [36]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=math.ceil(1104394 / batch_size),
                    verbose=1,
                    callbacks=callbacks,
                    validation_data=validation_generator,
                    initial_epoch=10,
                    epochs=1000,
                    use_multiprocessing=True,
                    max_queue_size=10,
                    workers = 20,
                    validation_steps=math.ceil(115033 / batch_size))


Epoch 11/1000
Epoch 12/1000
Epoch 13/1000

Process ForkPoolWorker-738:
Process ForkPoolWorker-734:
Process ForkPoolWorker-732:
Process ForkPoolWorker-731:
Process ForkPoolWorker-727:
Process ForkPoolWorker-735:
Process ForkPoolWorker-723:
Process ForkPoolWorker-736:
Process ForkPoolWorker-729:
Process ForkPoolWorker-730:
Process ForkPoolWorker-724:
Process ForkPoolWorker-733:
Process ForkPoolWorker-721:
Process ForkPoolWorker-739:
Process ForkPoolWorker-737:
Process ForkPoolWorker-722:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-726:
Traceback (most recent call last):
Process ForkPoolWorker-725:
Traceback (most recent call last):
Process ForkPoolWorker-728:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.6

  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_

KeyboardInterrupt: 

In [19]:
init_epochs = 25  # We pretrained the model already

# Keep training for as long as you like.
for i in range(10):
    # gradually decrease the learning rate
    K.set_value(model.optimizer.lr, 0.95 * K.get_value(model.optimizer.lr))
    start_epoch = (i * 2)
    epochs = ((i + 1) * 2)    
    model.fit_generator(generator=train_generator,
                        steps_per_epoch=math.ceil(1102624 / batch_size),
                        verbose=1,
                        callbacks=callbacks,
                        validation_data=validation_generator,
                        initial_epoch=start_epoch + init_epochs,
                        epochs=epochs + init_epochs,
                        use_multiprocessing=True,
                        max_queue_size=10,
                        workers = 20,
                        validation_steps=math.ceil(10000 / batch_size))

Epoch 26/27
Epoch 27/27
Epoch 28/29
Epoch 29/29
Epoch 30/31
Epoch 31/31
Epoch 32/33
Epoch 33/33
Epoch 34/35

Process ForkPoolWorker-1535:
Process ForkPoolWorker-1534:
Process ForkPoolWorker-1540:
Process ForkPoolWorker-1536:
Process ForkPoolWorker-1537:
Process ForkPoolWorker-1539:
Process ForkPoolWorker-1525:
Process ForkPoolWorker-1538:
Process ForkPoolWorker-1528:
Process ForkPoolWorker-1524:
Process ForkPoolWorker-1523:
Process ForkPoolWorker-1521:
Process ForkPoolWorker-1522:
Process ForkPoolWorker-1526:
Process ForkPoolWorker-1533:
Process ForkPoolWorker-1527:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/p

  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1143, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get

  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1143, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1615, in _get_batches_of_transformed_samples
    x = self.image_data_generator.random_transform(x)
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1009, in random_transform
    fill_mode=self.fill_mode, cval=self.cval)
  File "/home/user/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1143, in _

KeyboardInterrupt: 

Process ForkPoolWorker-1559:
Process ForkPoolWorker-1552:
Process ForkPoolWorker-1541:
Process ForkPoolWorker-1554:
Process ForkPoolWorker-1560:
Process ForkPoolWorker-1558:
Process ForkPoolWorker-1551:
Process ForkPoolWorker-1553:
Process ForkPoolWorker-1555:
Process ForkPoolWorker-1556:
Process ForkPoolWorker-1542:
Process ForkPoolWorker-1550:
Process ForkPoolWorker-1543:
Process ForkPoolWorker-1557:
Process ForkPoolWorker-1544:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/process.py

  File "/home/user/anaconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
KeyboardInterrupt
KeyboardInterrupt
  File "/home/user/anaconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/home/user/anaconda3/lib/

## Predict on test set

In [17]:
# dimensions of our images
img_width, img_height = 224, 224
batch_size = 120  # 258
# load the model we saved
#model = load_model('models/ResNet50_003-7.4781873.hdf5')
#model.compile(loss='categorical_crossentropy',
#              optimizer='adam',
#              metrics=['accuracy'])


df_test = pd.read_csv("test.csv")
df_test['Path'] = df_test['id'].map(lambda x: 'test/' + str(x) + '.jpg')
df_test['imgName'] = df_test['id'].map(lambda x: str(x) + '.jpg')
df_test = df_test.rename(columns={'Path': 'imgpath'})


In [6]:
batch_size = 40 
df_test = pd.read_csv("test.csv")
df_test['Path'] = df_test['id'].map(lambda x: 'test/' + str(x) + '.jpg')
df_test['imgName'] = df_test['id'].map(lambda x: str(x) + '.jpg')
df_test = df_test.rename(columns={'Path': 'imgpath'})

In [18]:
test_images = []
test_ids = []
prediction_total = []

# cropped 5 160x160 patches from 180x180 images ,
# left-top,right-top,middle,left-bottom,right-bottom,the score could improve 0.5 also.
pred_images = []
for _, c_row in tqdm(df_test.iterrows(), total=df_test.shape[0]):
    if(os.path.exists(c_row['imgpath'])):
        img = image.load_img(c_row['imgpath'], target_size=(img_width, img_height))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = x/255.
        #print(x)
        #crop_img = img[y:y+h, x:x+w]
        pred_images.append(x)
        #pred_images = np.vstack([x])
        #predictions = model.predict(pred_images, batch_size=batch_size)
        #prediction_total.append(predictions)
        #test_images.append(x)
        test_ids.append(c_row['id'])






In [19]:
pred_images = np.vstack(pred_images)

In [37]:
batch_size = 120
predictions_xception = model.predict(pred_images, batch_size=batch_size, verbose=1)



In [74]:
batch_size = 400
predictions_inception_resnetv2 = model.predict(pred_images, batch_size=batch_size)

In [11]:
batch_size = 300
predictions = model.predict(pred_images, batch_size=batch_size)#resnet50hadamard

In [23]:
batch_size = 400
predictions_resnet50_unknown = model.predict(pred_images, batch_size=batch_size)

In [61]:
batch_size = 500
predictions_vgg = model.predict(pred_images, batch_size=batch_size)

In [12]:
np.save("predictions_resnet50.npy",predictions)

In [85]:
np.save("predictions_inception_resnetv2.npy",predictions_inception_resnetv2)

In [24]:
np.save("predictions_resnet50_unknown.npy",predictions_resnet50_unknown)

In [29]:
predictions_center_loss = np.load("predictions_center_loss.npy")

In [25]:
predictions_inception_resnetv2 = np.load("predictions_inception_resnetv2.npy")

In [27]:
predictions_resnet50_unknown = np.load("predictions_resnet50_unknown.npy")

In [26]:
predictions = np.load("predictions_resnet50.npy")

In [28]:
predictions_resnet50_unknowns = []
for preds in tqdm(predictions_resnet50_unknown):
    predictions_resnet50_unknowns.append(preds[:len(preds)-1])
predictions_resnet50_unknowns = np.array(predictions_resnet50_unknowns)




In [30]:
predictions_center_losss = []
for preds in tqdm(predictions_center_loss):
    predictions_center_losss.append(preds[:len(preds)-1])
predictions_center_losss = np.array(predictions_center_losss)




## Ensemble Weighted Average

#predictions_inception_resnetv2  0.076
#predictions_resnet50_unknowns  0.101
#predictions                  0.089
#predictions_renset50_unknown_center_loss

In [38]:
final_predictions = 0.2 * predictions_inception_resnetv2 + 0.3 * predictions + 0.15 * predictions_resnet50_unknowns + 0.15 * predictions_center_losss + 0.2 *predictions_xception

In [12]:
final_predictions.shape

(115430, 14951)

In [46]:

#max_predictions = np.max(predictions_resnet50_unknown, axis=-1)
class_predictions_unknown = np.argmax(predictions_resnet50_unknown, axis=-1)

In [23]:


max_predictions = np.max(final_predictions, axis=-1)
class_predictions = np.argmax(final_predictions, axis=-1)



In [32]:
class_predictions

array([13231,  8246, 11755, ..., 12226,  5656,  2144])

In [None]:

new_width = 200
new_height = 200
test_images = []
test_ids = []
prediction_total = []
for _, c_row in tqdm(df_test.iterrows(), total=df_test.shape[0]):
    if(os.path.exists(c_row['imgpath'])):
        x = image.load_img(c_row['imgpath'], target_size=(256, 256))
        width, height = x.size   # Get dimensions
        #x = image.img_to_array(img)
        
        #############PREDICT CENTER
        left = (width - new_width)/2
        top = (height - new_height)/2
        right = (width + new_width)/2
        bottom = (height + new_height)/2
        centered_x = x.crop((left, top, right, bottom))
        centered_x = np.expand_dims(centered_x, axis=0)
        #centered_x = centered_x/255.
        
        #############PREDICT TOP LEFT
        left = 0
        top = 0
        right = new_width
        bottom = new_height
        topleft_x = x.crop((left, top, right, bottom))
        topleft_x = np.expand_dims(topleft_x, axis=0)
        #topleft_x = topleft_x/255.
        
        #############PREDICT TOP RIGHT
        left = width - new_width
        top = 0
        right = width
        bottom = new_height
        topright_x = x.crop((left, top, right, bottom))
        topright_x = np.expand_dims(topright_x, axis=0)
        #topright_x = topright_x/255.
        
        #############PREDICT BOTTOM LEFT
        left = 0
        top = height - new_height
        right = new_width
        bottom = height
        bottomleft_x = x.crop((left, top, right, bottom))
        bottomleft_x = np.expand_dims(bottomleft_x, axis=0)
        #bottomleft_x = bottomleft_x/255.
        
        #############PREDICT BOTTOM RIGHT
        left = width - new_width
        top = height - new_height
        right = width
        bottom = height
        bottomright_x = x.crop((left, top, right, bottom))
        bottomright_x = np.expand_dims(bottomright_x, axis=0)
        #bottomright_x = bottomright_x/255.
        
        pred_images = np.vstack([centered_x, topleft_x, topright_x, bottomleft_x, bottomright_x])
        pred_images = pred_images/255.
        #x = np.expand_dims(x, axis=0)
        #crop_img = img[y:y+h, x:x+w]
        #pred_images = np.vstack([x])
        predictions = model.predict(pred_images, batch_size=batch_size)
        #print("raw predictions {}".format(predictions))
        #print("raw predictions length {}".format(len(predictions)))
        predictions = np.mean(predictions, axis = 0)
        #print("predictions {}".format(predictions))
        #print("predictions length {}".format(len(predictions)))
        prediction_total.append(predictions)
        #test_images.append(x)
        test_ids.append(c_row['id'])

In [34]:
preddf = pd.DataFrame(predictions)
preddf['id']= test_ids


In [54]:
###UNKNOWN
count = 0
submit_prep = []
for idx, pp in enumerate(max_predictions):
    if (class_predictions_unknown[idx] != 14951 ):
        submit_prep.append(str(final_class_pred[idx]) + " " + str(pp))
    else:
        submit_prep.append("")
        count+=1
print(count)

52282


In [51]:
submit_prep = []
for idx, pp in enumerate(max_predictions):
    cc = pp
    if (pp > 0.3 ):
        submit_prep.append(str(final_class_pred[idx]) + " " + str(cc))
    else:
        submit_prep.append("")

In [47]:

filtered_landmarks = []
for idx, c_row in tqdm(hi.iterrows(), total=hi.shape[0]):
    if(str(c_row['landmarks']) != 'nan'):
        filtered_landmarks.append(sample_pred.iloc[idx]['landmarks'])
    else:
        filtered_landmarks.append("")



In [40]:
submit_prep = []
for idx, pp in enumerate(max_predictions):
    submit_prep.append(str(class_predictions[idx]) + " " + str(pp))

In [41]:
submit = pd.DataFrame()
submit["id"] = test_ids
submit["landmarks"] = submit_prep
sample_pred = pd.read_csv("sample_submission.csv")
sample_pred = sample_pred.drop("landmarks",axis=1)
sample_pred = sample_pred.merge(submit, on="id", how='left')
sample_pred.to_csv("landmark_pred_twofinale.csv", index=False)