In [1]:
#Load important libraries
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
import keras.layers as L
from keras import regularizers, optimizers
from collections import Counter
import keras
from keras import Model
import tensorflow as tf
from tensorflow.keras.applications.xception import Xception
from keras.callbacks import EarlyStopping, ModelCheckpoint
import warnings
warnings.filterwarnings("ignore")
from keras.models import load_model



In [2]:
# Load the training data 
label = pd.read_csv("../input/landmark-recognition-2020/train.csv")
label.head()


Unnamed: 0,id,landmark_id
0,17660ef415d37059,1
1,92b6290d571448f6,1
2,cd41bf948edc0340,1
3,fb09f1e98c6d2f70,1
4,25c9dfc7ea69838d,7


In [3]:
#Print the total number of pictures and landmarks
print("The total number of pictures in the dataset:", len(label))
print("The total number of landmarks in the dataset:", label.landmark_id.nunique())

The total number of pictures in the dataset: 1580470
The total number of landmarks in the dataset: 81313


In [4]:
# missing data in training data 
label.isnull().sum().sort_values(ascending = False)

id             0
landmark_id    0
dtype: int64

In [5]:
# Top landmark_ids with highest number of landsmark images
top10 = pd.DataFrame(label.landmark_id.value_counts().head(10))

In [6]:
# Lowe landmark_ids with less number of landsmark images
low10 = pd.DataFrame(label.landmark_id.value_counts().tail(10))
low10

Unnamed: 0_level_0,count
landmark_id,Unnamed: 1_level_1
84791,2
84731,2
133657,2
84711,2
84686,2
84677,2
36989,2
133688,2
17316,2
111405,2


In [7]:
#Getting filepath so that we can then convert them to images
def get_train_file_path(image_id):
    return "../input/landmark-recognition-2020/train/{}/{}/{}/{}.jpg".format(image_id[0], image_id[1], image_id[2], image_id)

label['file_path'] = label['id'].apply(get_train_file_path)
df=label.reset_index()
df.drop("index",axis=1,inplace=True)
df.head()

Unnamed: 0,id,landmark_id,file_path
0,17660ef415d37059,1,../input/landmark-recognition-2020/train/1/7/6...
1,92b6290d571448f6,1,../input/landmark-recognition-2020/train/9/2/b...
2,cd41bf948edc0340,1,../input/landmark-recognition-2020/train/c/d/4...
3,fb09f1e98c6d2f70,1,../input/landmark-recognition-2020/train/f/b/0...
4,25c9dfc7ea69838d,7,../input/landmark-recognition-2020/train/2/5/c...


In [8]:
df['landmark_id'] = df.landmark_id.astype(str)

In [9]:
from sklearn.model_selection import train_test_split
train,test=train_test_split(df,test_size=0.2, stratify=df["landmark_id"])

In [10]:
train.sample(5)

Unnamed: 0,id,landmark_id,file_path
1199112,4d34da386e1a11cb,154243,../input/landmark-recognition-2020/train/4/d/3...
131498,8c8ac1a811b49bff,17222,../input/landmark-recognition-2020/train/8/c/8...
1560495,22cb83d431cea668,200552,../input/landmark-recognition-2020/train/2/2/c...
1453232,e63ee15c5a39af58,186957,../input/landmark-recognition-2020/train/e/6/3...
1556049,7ca61bb89ebdbdf1,199890,../input/landmark-recognition-2020/train/7/c/a...


In [11]:
train["landmark_id"].nunique()

81313

In [12]:
test.sample(5)

Unnamed: 0,id,landmark_id,file_path
749623,a3fb64e51d294ef1,96663,../input/landmark-recognition-2020/train/a/3/f...
1267634,e6ec371ed0376288,163367,../input/landmark-recognition-2020/train/e/6/e...
251231,9670333548b67c89,32056,../input/landmark-recognition-2020/train/9/6/7...
441269,884efd948cacc987,55780,../input/landmark-recognition-2020/train/8/8/4...
794783,52468d36c47e8e67,102680,../input/landmark-recognition-2020/train/5/2/4...


In [13]:
test["landmark_id"].nunique()

76563

In [14]:
val_rate = 0.2
batch_size = 64
gen = ImageDataGenerator(rescale=1./255,validation_split=val_rate)

train_gen = gen.flow_from_dataframe(
    train,
    x_col="file_path",
    y_col="landmark_id",
    weight_col=None,
    target_size=(256, 256),
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=batch_size,
    shuffle=True,
    subset="training",
    seed=123,
    interpolation="nearest",
    validate_filenames=False)
    
val_gen = gen.flow_from_dataframe(
    train,
    x_col="file_path",
    y_col="landmark_id",
    weight_col=None,
    target_size=(256, 256),
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=batch_size,
    shuffle=True,
    subset="validation",
    seed=123,
    interpolation="nearest",
    validate_filenames=False)

Found 1011501 non-validated image filenames belonging to 81313 classes.


Found 252875 non-validated image filenames belonging to 81313 classes.


In [None]:
gen = ImageDataGenerator(rescale=1./255)

test_gen = gen.flow_from_dataframe(
    test,
    x_col="file_path",
    y_col="landmark_id",
    target_size=(256, 256),
    class_mode="categorical",
    batch_size=1,
    shuffle=True,
    seed=123,)

In [None]:
from tensorflow.keras.applications import InceptionV3
inception = InceptionV3(
    include_top=False,
    weights='imagenet',
    input_shape=(256, 256, 3),
    classifier_activation='softmax'
)

inception.trainable = True
inception.summary()

In [None]:
#Making model
from tensorflow.keras.layers import Dense, Dropout, MaxPooling2D, GlobalAveragePooling2D, Flatten, Conv2D, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
import tensorflow as tf

model = Sequential()
model.add(inception)
model.add(GlobalAveragePooling2D())
model.add(Dense(81313, activation='sigmoid'))
model.compile(optimizer='adamax',
              loss = 'categorical_crossentropy',
              metrics=[tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),'accuracy'])
model.summary()

In [None]:
model.load_weights()

## **Case1: Taking epochs=5**

In [None]:
#setting epochs, train_steps, and val_steps
epochs = 2
train_steps = int(len(train)*(1-val_rate))//batch_size
val_steps = int(len(train)*val_rate)//batch_size

In [None]:
#Fitting the model
history = model.fit_generator(train_gen, 
                              steps_per_epoch=train_steps, 
                              epochs=epochs,validation_data=val_gen, 
                              validation_steps=val_steps)

In [None]:
# Save the model weights after training
model.save_weights('model_weights.h5')

In [None]:
#Plotting Accuracy and Precision for training and validation
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
precision = history.history['precision']
val_precision = history.history['val_precision']

epochs = range(1, len(acc)+1)

plt.plot(epochs, acc, '#21466C', label='Training acc')
plt.plot(epochs, val_acc, '#cc1123', label='Validation acc')
plt.xlabel('num of Epochs')
plt.ylabel('accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, precision, '#21466C', label='Training precision')
plt.plot(epochs, val_precision, '#cc1123', label='Validation precision')
plt.xlabel('num of Epochs')
plt.ylabel('precision')
plt.title('Training and validation precision')
plt.legend()

plt.show()

In [None]:
model=

In [None]:
scores = model.predict(test_gen[0])
# print('loss:', scores[0])
# print('precision:', scores[1])
# print('recall:', scores[2])
# print('accuracy:', scores[3])