# **Import necessary libraries**

In [None]:
import shutil
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 

from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

#Seed for making reproducible experiments
seed = 612

In [None]:
import cv2
import matplotlib.image as mpimg

# EDA

In [None]:
train_data = pd.read_csv("../input/landmark-recognition-2020/train.csv")
train_data.sample(5, random_state=seed)

In [None]:
print('Unique labels:', train_data['landmark_id'].nunique())

In [None]:
print('Images per label:',train_data.groupby('landmark_id').count().mean() )

In [None]:
landmarks_count = (train_data.groupby('landmark_id').count()).sort_values(by ='id',ascending = False).reset_index()

In [None]:
landmarks_count['percent_data'] = (landmarks_count.id.cumsum()/1580470)

In [None]:
landmarks_count[landmarks_count['percent_data'] > 0.71]

In [None]:
landmarks_count[50:]

In [None]:
plt.figure(figsize=(25,12))
plt.scatter(landmarks_count[40:]['landmark_id'],landmarks_count[40:]['id'])
plt.show()

In [None]:
plt.figure(figsize = (15,7))
plt.title("Top 20 most frequent landmarkrs")
sns.barplot(x='landmark_id', y='id', data=landmarks_count.head(20), palette="mako")
plt.show()

In [None]:
plt.figure(figsize = (15,7))
plt.title("Top 20 most frequent landmarkrs")
sns.barplot(x='landmark_id', y='id', data=landmarks_count.tail(20), palette="mako")
plt.show()

In [None]:
plt.figure(figsize=(25,12))
plt.scatter(landmarks_count[4000:]['landmark_id'],landmarks_count[4000:]['id'])
plt.show()

In [None]:
plt.figure(figsize=(25,12))
plt.scatter(landmarks_count[5:4000]['landmark_id'],landmarks_count[5:4000]['id'])
plt.show()

# Data subset for model

In [None]:
data_subset = pd.read_csv('../input/sampled-data-1000/Samples 1000.csv')
data_subset_images = train_data[train_data['landmark_id'].isin(data_subset['landmark_id'])]

In [None]:
 #Check data distribution
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pyplot import subplots

colors = np.array(['#4285f4','#34a853','#fbbc05','#ea4335'])
#Define the order in which to display the graph
order = ['1-5','5-10','10-50','50-100','100-200','200-500','>=500']
f, (ax1, ax2) = plt.subplots(1, 2,figsize=(15,5))


def plot_distribution(data_f, data_k, axis):
    x=data_f.landmark_id.value_counts().index
    y=pd.DataFrame(data_f.landmark_id.value_counts())

    #Create a variable to group the number of image sin each class
    y.loc[(y['landmark_id']>=500,'Number of images')] = '>=500'
    y['Number of images'] = np.where((y['landmark_id']>=200) & (y['landmark_id']<500),'200-500',y['Number of images'])
    y['Number of images'] = np.where((y['landmark_id']>=100) & (y['landmark_id']<200),'100-200',y['Number of images'])
    y['Number of images'] = np.where((y['landmark_id']>=50) & (y['landmark_id']<100),'50-100',y['Number of images'])
    y['Number of images'] = np.where((y['landmark_id']>=10) & (y['landmark_id']<50),'10-50',y['Number of images'])
    y['Number of images'] = np.where((y['landmark_id']>=5) & (y['landmark_id']<10),'5-10',y['Number of images'])
    y['Number of images'] = np.where((y['landmark_id']>=0) & (y['landmark_id']<5),'1-5',y['Number of images'])

    y['Number of images'].value_counts().loc[[x for x in order if any(y['Number of images']==x)]].plot(kind = 'bar',color = colors,width = 0.8, ax=axis)
    axis.set_xlabel('Number of images',fontsize=15)
    axis.set_ylabel('Number of classes',fontsize=15)
    axis.set_title(data_k,fontsize=17)
    
plot_distribution(data_subset_images, 'Sample', ax1)
plot_distribution(train_data, 'Original', ax2)

In [None]:
import gc
del train_data
del landmarks_count
gc.collect()

In [None]:
data_subset_images.head(5)

# Data Preprocessing

In [None]:
TRAIN_DIR = '../input/landmark-recognition-2020/train/'

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_subset_images['id'], data_subset_images['landmark_id'], test_size=0.2, random_state=42, stratify = data_subset_images['landmark_id'])

In [None]:
data_subset_images['train'] = 1
data_subset_images.head()

In [None]:
print('X_train:',X_train.shape)
print('y_train:',y_train.shape)
print('X_test:',X_test.shape)
print('y_test:',y_test.shape)

In [None]:
data_subset_images[data_subset_images['train']==1]

In [None]:
#Merging traing df
Training_images = pd.DataFrame()
Training_images['id'] = X_train
Training_images['landmark_id'] = y_train
Training_images = Training_images[Training_images['landmark_id'].isin(y_test)]

In [None]:
#Merging test df
Testing_images = pd.DataFrame()
Testing_images['id'] = X_test
Testing_images['landmark_id'] = y_test

In [None]:
Training_images.to_csv('./Training_images.csv')
Testing_images.to_csv('./Testing_images.csv')

In [None]:
# Distribution of labels in test and train
print(Training_images['landmark_id'].nunique())
print(Testing_images['landmark_id'].nunique())

In [None]:
# Training_images = Training_images.drop('Unnamed: 0',axis = 1)
# Testing_images = Testing_images.drop('Unnamed: 0',axis = 1)

In [None]:
import gc
del X_train
del y_train
del X_test
del y_test
gc.collect()

In [None]:
import cv2,os
DEST_PATH = '/kaggle/working/resized_images/training_images/'
os.mkdir('/kaggle/working/resized_images/')
os.mkdir('/kaggle/working/resized_images/training_images/')
# Resizing images
def images_resize(images):    
    for i, id in enumerate(images):
        if os.path.exists(DEST_PATH + f'{id[0]}.jpg'):
            continue
        else:

            image_path = os.path.join(TRAIN_DIR, f'{id[0][0]}/{id[0][1]}/{id[0][2]}/{id[0]}.jpg')
            image = cv2.imread(image_path)
            new_image = cv2.resize(image,(224,224))
            cv2.imwrite(os.path.join(DEST_PATH,f'{id[0]}.jpg'),new_image)

images_resize(Training_images.values)
print('Images resized')



In [None]:
# for root, dirs, files in os.walk('./resized_images'):
#     for file in files:
#         img = cv2.imread(os.path.join(root,file))
#         print(img,file)
#         plt.imshow(img)
#         break

In [None]:
# i = 0
# for root, dirs, files in os.walk('./resized_images'):
#     for file in files:
#         img = cv2.imread(os.path.join(root,file))
#         print(img,file)
#         plt.imshow(img)
#         if i == 10:
#             break

In [None]:
# f.add_subplot(1,2,1)
# plt.imshow(cv2.imread('../input/landmark-recognition-2020/train/6/d/4/6d4846da6209b860.jpg'))

In [None]:
damaged_images = []
image_array = []
for root, dirs, files in os.walk('./resized_images/training_images'):
    
    for file in files:
        if root == './resized_images/training_images':
            img = cv2.imread(os.path.join(root,file))
            if img is None:
                damaged_images.append(file)
            else:
                image_array.append(img)



In [None]:
len(image_array)

In [None]:
# stop

In [None]:
#Test dataset
DEST_PATH_1 = '/kaggle/working/resized_images/testing_images/'
os.mkdir('/kaggle/working/resized_images/testing_images/')
# Resizing images
def images_resize(images):    
    for i, id in enumerate(images):
        
        if os.path.exists(DEST_PATH_1 + f'{id[0]}.jpg'):
            continue
        else:
            image_path = os.path.join(TRAIN_DIR, f'{id[0][0]}/{id[0][1]}/{id[0][2]}/{id[0]}.jpg')
            image = cv2.imread(image_path)
            new_image = cv2.resize(image,(224,224))
            cv2.imwrite(os.path.join(DEST_PATH_1,f'{id[0]}.jpg'),new_image)

images_resize(Testing_images.values)
print('Images resized')



In [None]:
print(cv2.imread('/kaggle/working/resized_images/testing_images/0d7d04144065ad08.jpg'))

In [None]:
DEST_PATH_1 = '/kaggle/working/resized_images/testing_images/'


In [None]:
damaged_images_test = []
image_array_test = []
for root, dirs, files in os.walk('./resized_images/testing_images'):
    
    for file in files:
        if root == './resized_images/testing_images':
            img = cv2.imread(os.path.join(root,file))
            if img is None:
                damaged_images_test.append(file)
            else:
                image_array_test.append(img)

In [None]:
print(len(image_array_test))
len(damaged_images_test)


In [None]:
import numpy as np
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.applications.imagenet_utils import decode_predictions
from keras.optimizers import Adam
from keras.applications import ResNet152
from keras.applications import ResNet101
import os,cv2
import matplotlib.pyplot as plt 


In [None]:
res_model = ResNet50(weights='imagenet', include_top = False, input_shape = (224,224,3))

for layer in res_model.layers[:143]:
    layer.trainable = False
    

In [None]:
res_model.summary()

In [None]:
from keras.models import Sequential
from keras.layers import Flatten, BatchNormalization, Dropout, Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator


In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision
    
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
import keras
from sklearn.metrics import log_loss, roc_auc_score, accuracy_score
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras.callbacks import *
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.CategoricalAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
      "acc", f1_m
]

In [None]:
model = Sequential()
model.add(res_model)
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(1024,activation = 'relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(512,activation = 'relu'))
model.add(BatchNormalization())
model.add(Dense(943,activation = 'softmax'))

model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=METRICS)
print ('Compilation done.')


In [None]:
model.summary()

In [None]:
Training_images['id_new'] = Training_images['id'].astype(str) + '.jpg' 
Training_images['id_new']
Testing_images['id_new'] = Testing_images['id'].astype(str) + '.jpg' 


In [None]:
Testing_images['landmark_id_new'] = Testing_images['landmark_id'].astype(str)
Training_images['landmark_id_new'] = Training_images['landmark_id'].astype(str)
Testing_images['id_new'] = Testing_images['id_new'].astype(str)
Training_images['id_new'] = Training_images['id_new'].astype(str)


In [None]:
# batch_size = 64
train_datagen = ImageDataGenerator()
                                  
    
val_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe = Training_images,
                                                       directory = './resized_images/training_images/',
                                                       x_col = 'id_new',
                                                       y_col = 'landmark_id_new',
                                                    class_mode="categorical",
                                                    preprocessing_function = preprocess_input
#                                                    ,batch_size = batch_size
                                                   )
val_generator = val_datagen.flow_from_dataframe(dataframe = Testing_images,
                                                       directory = './resized_images/testing_images/',
                                                       x_col = 'id_new',
                                                       y_col = 'landmark_id_new',
                                                class_mode="categorical",
                                                preprocessing_function = preprocess_input
                                               )

In [None]:
epochs = 10
history_base = model.fit(
        train_generator,
#         steps_per_epoch=50,
        epochs=epochs,
        validation_data=val_generator)


In [None]:
print(history_base.history['tp'])
print(history_base.history['tn'])
print(history_base.history['fp'])
print(history_base.history['fn'])

# Class accuracy
total = np.array(history_base.history['tp'])+np.array(history_base.history['tn'])+np.array(history_base.history['fp'])+np.array(history_base.history['fn'])

actual = np.array(history_base.history['tp'])+np.array(history_base.history['tn'])

accuracy = actual/total

accuracy

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

Y_pred = model.predict(val_generator, 3710 // 64+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(val_generator.classes, y_pred))

ConfusionMat = confusion_matrix(val_generator.classes, y_pred)

In [None]:
import numpy as np 
ConfusionMat_1 = (ConfusionMat.astype('float') / ConfusionMat.sum(axis=1)[:, np.newaxis])

ClassAccuracy = sorted(ConfusionMat_1.diagonal(),reverse = True)
ClassAccuracy[:5]

# **Model 3**

In [None]:
!pip install git+https://github.com/qubvel/classification_models.git

In [None]:
# for keras
from classification_models.keras import Classifiers

# for tensorflow keras
from classification_models.tfkeras import Classifiers

Classifiers.models_names()

In [None]:
SeResNeXT101, preprocess_input = Classifiers.get('seresnext101')
model_SeResNeXT101 = SeResNeXT101(include_top = False, input_shape=(224, 224, 3), weights='imagenet')

In [None]:
ResNet101v2, preprocess_input = Classifiers.get('resnet101v2')
model_ResNet101v2 = ResNet101v2(include_top = False, input_shape=(224, 224, 3), weights='imagenet')

ResNet152, preprocess_input = Classifiers.get('resnet152')
model_ResNet152 = ResNet152(include_top = False, input_shape=(224, 224, 3), weights='imagenet')
!pip install efficientnet
import efficientnet.keras as efn 
model_EfficientNetB3 = efn.EfficientNetB3(weights='imagenet') 

In [None]:
for layer in model_SeResNeXT101.layers[:2670]:
    layer.trainable = False
    
    
for layer in model_ResNet152.layers[:523]:
    layer.trainable = False

    
for layer in model_ResNet101v2.layers[:302]:
    layer.trainable = False

    
for layer in model_EfficientNetB3.layers[:307]:
    layer.trainable = False


# for i, layer in enumerate(model_EfficientNetB3.layers):
#     print(i, layer.name,'-',layer.trainable)


In [None]:
model_1 = Sequential()
model_1.add(model_SeResNeXT101)
model_1.add(GlobalAveragePooling2D())
model_1.add(Flatten())
model_1.add(BatchNormalization())
model_1.add(Dense(1024,activation = 'relu'))
model_1.add(Dropout(0.5))
model_1.add(BatchNormalization())
model_1.add(Dense(512,activation = 'relu'))
model_1.add(BatchNormalization())
model_1.add(Dense(943,activation = 'softmax'))

model_1.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=METRICS)
print ('Compilation done.')

In [None]:
model_2 = Sequential()
model_2.add(model_ResNet152)
model_2.add(GlobalAveragePooling2D())
model_2.add(Flatten())
model_2.add(BatchNormalization())
model_2.add(Dense(1024,activation = 'relu'))
model_2.add(Dropout(0.5))
model_2.add(BatchNormalization())
model_2.add(Dense(512,activation = 'relu'))
model_2.add(BatchNormalization())
model_2.add(Dense(943,activation = 'softmax'))

model_2.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=METRICS)
print ('Compilation done.')

In [None]:
model_3 = Sequential()
model_3.add(model_ResNet101v2)
model_3.add(GlobalAveragePooling2D())
model_3.add(Flatten())
model_3.add(BatchNormalization())
model_3.add(Dense(1024,activation = 'relu'))
model_3.add(Dropout(0.5))
model_3.add(BatchNormalization())
model_3.add(Dense(512,activation = 'relu'))
model_3.add(BatchNormalization())
model_3.add(Dense(943,activation = 'softmax'))

model_3.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=METRICS)
print ('Compilation done.')

In [None]:
model_4 = Sequential()
model_4.add(model_EfficientNetB3)
# model_4.add(GlobalAveragePooling2D())
model_4.add(Flatten())
model_4.add(BatchNormalization())
model_4.add(Dense(1024,activation = 'relu'))
model_4.add(Dropout(0.5))
model_4.add(BatchNormalization())
model_4.add(Dense(512,activation = 'relu'))
model_4.add(BatchNormalization())
model_4.add(Dense(943,activation = 'softmax'))

model_4.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=METRICS)
print ('Compilation done.')

In [None]:
# batch_size = 64
train_datagen = ImageDataGenerator()
                                  
    
val_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe = Training_images,
                                                       directory = './resized_images/training_images/',
                                                       x_col = 'id_new',
                                                       y_col = 'landmark_id_new',
                                                    class_mode="categorical",
                                                    preprocessing_function = preprocess_input
#                                                    ,batch_size = batch_size
                                                   )
val_generator = val_datagen.flow_from_dataframe(dataframe = Testing_images,
                                                       directory = './resized_images/testing_images/',
                                                       x_col = 'id_new',
                                                       y_col = 'landmark_id_new',
                                                class_mode="categorical",
                                                preprocessing_function = preprocess_input

#                                     ,batch_size= batch_size
                                               )

In [None]:
epochs = 10
history_1 = model_1.fit(
        train_generator,
        epochs=epochs,
        validation_data=val_generator)
   
model_1.save('model_1_SEResNeXt')

In [None]:
epochs = 10
history_2 = model_2.fit(
        train_generator,
        epochs=epochs,
        validation_data=val_generator) 

model_2.save('model_2_ResNet152')

In [None]:
epochs = 10
history_3 = model_3.fit(
        train_generator,
        epochs=epochs,
        validation_data=val_generator)

model_3.save('model_3_ResNet101v2')

In [None]:
epochs = 10
history_4 = model_4.fit(
        train_generator,
        epochs=epochs,
        validation_data=val_generator)

model_4.save('model_EfficientNetB3')