## Display Images

In [21]:
from IPython.display import Image
from IPython.core.display import HTML 

def displayLandmarkImagesLarge(urls, category_name):
    img_style = "width: 200px; height:160px; margin: 0px; float: left; border: 1px solid black;"
    images_list = ''.join([f"<img style='{img_style}' src='{u}' />" for _, u in urls.head(12).iteritems()])
    display(HTML(images_list))

category = data['landmark_id'].value_counts().keys()[15]
urls = data[data['landmark_id'] == category]['url']
displayLandmarkImagesLarge(urls, "")

In [22]:
pd.Series(data['landmark_id'].value_counts().keys())[1:4]

1    6051
2    6599
3    9779
dtype: int64

### Predictions using weights from ImageNet

Before proceeding with any model training, VGG16 architecture trained on Imagenet dataset is picked up and is used to predict on the images that we had. Observation: Imagenet weights are successfully able to capture the generic features from landmarks.

In [31]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np

model =VGG16(weights = 'imagenet', include_top = True)

img_path = r'..\Dataset\DataFiltered\train_images_model\0fecac19aec6d24c.jpg'
img = image.load_img(img_path, target_size= (224,224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
x = preprocess_input(x)

preds = model.predict(x)
print ('Predicted:', decode_predictions(preds, top = 3)[0])

# 0a0667fc2436a04b = stupa (17%)
# 0b48dd3b81d5b6ba = triumphal_arch(25%)

Predicted: [('n09468604', 'valley', 0.48527926), ('n09399592', 'promontory', 0.1536856), ('n09246464', 'cliff', 0.12624218)]


In [None]:
# available = [int((x[0].split('/'))[-1]) for x in os.walk(r'Data/train_images_model/') if len((x[0].split('/'))[-1]) > 0]
# new = [str(x) for x in range(1,14000) if x not in available]
# for i in new:
#     path = 'Data/train_images_model/' + i
#     if not os.path.exists(path):
#         os.makedirs(path)

# Model Training - Transfer Learning

In [None]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.models import Model
import csv
import os
import cv2
from keras.models import load_model
import matplotlib.pyplot as plt
import math
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import tensorflow as tf
import keras

In [10]:
train_data_dir = 'Data/train_images_model'
validation_data_dir = 'Data/validation_images_model'

#counts the number of files in subdirectories of a given directory. 
#outputs the count of images and label of each image in order of reading.

def count(dir):
    i = 1
    count = []
    while i <= 50:
    
        f = str(i)
        #print (f)
        for root, dirs, files in os.walk(dir +'/'+ f):  # loop through startfolders
            for pic in files:
                count.append(f)   
        i += 1
    print (len(count))
    return ([len(count),count])

nb_train_samples = count(train_data_dir)
nb_validation_samples = count(validation_data_dir)
print(nb_train_samples)
print(nb_validation_samples)

# nb_train/validation_samples is a list of lists of the format [len(count),count]. len(count) is the total number of images in 
# train/validation folder. count is a list of class labels i.e. landmark IDs of the images read.

0
0
[0, []]
[0, []]


In the next steps, a batch size will be defined, which needs to be a factor of both the number of train and validation images. I had to remove a certain number of images in the process. Use the code below to move some images out of the train/validation folder in order to get a common factor in the range of 100-1000 (approx.). This common factor will be used as the batch size going forward.
A folder named 'Data/moved_images' has to be created before executing the below. 

In [11]:
# CL = '2338' #class from which to move images
# NI = 50 #Number of images to be moved

# i = 0
# for files in os.listdir(r'Data/train_images_model/' + CL):    # loop through startfolders
#             i+=1
#             inpath = r'Data/train_images_model/' + CL + '/' + files
#             outpath = r'Data/moved_images' 
#             shutil.move(inpath, outpath)
#             if i == NI:
#                 break

#### Converting images to vectors using weights from ImageNet on VGG16

In [12]:
## Had to remove images (1046 - 6c39ce7ee33cea49.jpg)(1310 - f2bf3d0a94733000.jpg) (1834 - 35494ec38ca8b6ff.jpg) 
##(1946 - aa108073efc35e13.jpg) (2729 - 43608f66f2f680e3.jpg) (2975 - bf4b45efc3dfda07.jpg) 
# because were throwing error while running the code.

img_width, img_height = 96, 96 # dimensions of downloaded images.
top_model_weights_path = 'bottleneck_fc_model.h5' # A file with this name would be saved later in the code
epochs = 5
batch_size = 1000 #As found in the previous step
def save_bottleneck_features():
    datagen = ImageDataGenerator(rescale=1. / 255,
                                 rotation_range=30,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 zoom_range = 0.5,
                                 brightness_range = [0.5,1.5])

#     build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet', input_shape=(96,96,3))
    print(model.summary())
    print ('start1')
    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None, # this means our generator will only yield batches of data, no labels
        shuffle=False) # our data will be in order
    
#     the predict_generator method returns the output of a model, given a generator that yields batches of numpy data
    print ('start2')
    bottleneck_features_train = model.predict_generator(generator, nb_train_samples[0] // batch_size) #nb_train_samples[0]
    print ('bottleneck_features_trained')
    print(nb_train_samples[0])
    print (bottleneck_features_train)
    
    
    with open('bottleneck_features_train.npy', 'wb') as features_train_file:
        np.save(features_train_file, bottleneck_features_train)
    print ('Train done')
    

## Had to remove (2061 - a32ee99990779b22.jpg) because was throwing an error
    datagen = ImageDataGenerator(rescale=1. / 255) #No image augmentation in validation dataset
    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    print ('validation predict start')
    bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples[0] // batch_size)
    print (bottleneck_features_validation)

    with open('bottleneck_features_validation.npy', 'wb') as features_validation_file:
        np.save(features_validation_file, bottleneck_features_validation)
    
    print ('validation done')
save_bottleneck_features()

W0714 00:42:02.518367 26388 deprecation_wrapper.py:119] From C:\Users\User\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0714 00:42:02.591372 26388 deprecation_wrapper.py:119] From C:\Users\User\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0714 00:42:02.612375 26388 deprecation_wrapper.py:119] From C:\Users\User\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0714 00:42:02.691380 26388 deprecation_wrapper.py:119] From C:\Users\User\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 96, 96, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 96, 96, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 96, 96, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 48, 48, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 48, 48, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 48, 48, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 24, 24, 128)       0         
__________

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'Data/train_images_model'

#### Initializing the weights on top 3 layers

In [None]:
epochs = 5
batch_size = 1000
import numpy as np

def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
    train_labels = np.array(nb_train_samples[1])
    train_labels = [str(int(train_label) - 1) for train_label in train_labels] 
    #Had to subtract 1000 because class labels should start from 0. In this case, class labels had a range from 1000 to 2999. 
#     print (train_labels)
    validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
    validation_labels = np.array(nb_validation_samples[1])
    validation_labels = [str(int(validation_label) - 1) for validation_label in validation_labels]
    
#     print (validation_labels)
    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(256, activation='relu'))
    n_class = 50 #number of classes fed to the model
    model.add(Dense(n_class, activation='softmax'))
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy', metrics=['accuracy'])
    train_labels = to_categorical(train_labels, n_class)
    validation_labels = to_categorical(validation_labels, n_class)
    
    
    print ('model fit starting')
    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)

train_top_model()

#### Compile and train the entire model

In [None]:
 img_width, img_height = 96, 96
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Data/train_images_model'
validation_data_dir = 'Data/validation_images_model'
batch_size = 100
epochs = 15
def trainCNN():

    # build the VGG16 network

    base_model = applications.VGG16(weights='imagenet',include_top= False,input_shape=(96,96,3))

    top_model = Sequential()
    top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    top_model.add(Dense(256, activation='relu'))
    top_model.add(Dense(256, activation='relu'))
    n_class = 50
    top_model.add(Dense(n_class, activation='softmax'))
    top_model.load_weights(top_model_weights_path) #Load the weights initialized in previous steps
    

    model = Model(input= base_model.input, output= top_model(base_model.output))
    
    # set the first 16 layers to non-trainable (weights will not be updated) - 1 conv layer and three dense layers will be trained
    for layer in model.layers[:16]:
        layer.trainable = False

    # compile the model with a SGD/momentum optimizer and a very slow learning rate.
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(lr=0.0001, beta_1=0.9,beta_2=0.999,epsilon=1e-8, decay=0.0),
                  metrics=['accuracy'])
    print ('Compilation done.')
    
    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       rotation_range=90,
                                        width_shift_range=0.2,
                                        height_shift_range=0.2,
                                        zoom_range = 0.5)
    
    valid_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')

    np.save('class_indices.npy', train_generator.class_indices)

    validation_generator = valid_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')
    
    print ('Model fit begins...')
    model.fit_generator(
        train_generator,
        steps_per_epoch=340,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=150,
        callbacks=[ModelCheckpoint(filepath=top_model_weights_path, save_best_only=True, save_weights_only=True)]
        )
    
    model.save_weights(top_model_weights_path)

trainCNN()

# Predictions

In [None]:
from tensorflow.python.platform import app
import argparse
import os
import sys
import time
from time import *
import io
import tensorflow as tf

In [None]:
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Data/train_images_model'
testfile = 'Data/test_images_from_train'

subfile = 'Data/sub_spezifinal.csv'

def count(dir):
    i = 1
    count = []
    while i <= 9:
        f = str(i)
        for root, dirs, files in os.walk(dir +'/'+ f):  # loop through startfolders
            for pic in files:
                count.append(pic)

        i += 1
    return len(count)

nb_train_samples = count(train_data_dir)
nb_validation_samples = count(validation_data_dir)

In [None]:
from keras import backend as K 
from keras.applications.vgg16 import preprocess_input, decode_predictions
def predict(image_path):
    print ('starting...')
    path, dirs, files = next(os.walk(image_path))
    file_len = len(files)
    print('Number of Testimages:', file_len)

    train_datagen = ImageDataGenerator(rescale=1. / 255)

    generator = train_datagen.flow_from_directory(train_data_dir, batch_size=batch_size)
    label_map = (generator.class_indices)

    n_class = 50+

    with open(subfile, 'w') as csvfile:
        newFileWriter = csv.writer(csvfile)
        newFileWriter.writerow(['id', 'landmarks'])

        file_counter = 0
        for root, dirs, files in os.walk(image_path):  # loop through startfolders
            for pic in files:
                t1 = clock()

                #loop folder and convert image
                path = image_path + '/' + pic


                orig = cv2.imread(path)
                image = load_img(path, target_size=(96, 96))
                image = img_to_array(image)

                # important! otherwise the predictions will be '0'
                image = image / 255

                image = np.expand_dims(image, axis=0)

                #classify landmark
                base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(96, 96, 3))

                top_model = Sequential()
                top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
                top_model.add(Dense(256, activation='relu'))
                top_model.add(Dense(256, activation='relu'))
                top_model.add(Dense(n_class, activation='softmax'))

                model = Model(input=base_model.input, output=top_model(base_model.output))
                model.load_weights(top_model_weights_path)

                prediction = model.predict(image)
                #print(prediction)
                
                #print ('Predicted:', decode_predictions(prediction, top = 3)[0])
                
                class_predicted = prediction.argmax(axis=1)
                #class_predicted = np.argmax(prediction,axis=1)
                #print (pic, class_predicted)


                inID = class_predicted[0]
                #print (inID)

                inv_map = {v: k for k, v in label_map.items()}
#                 print (class_dictionary)

                label = inv_map[inID]
                #print(label)

                score = max(prediction[0])
                scor = "{:.2f}".format(score)
                out = str(label) + ' '+ scor

                #print (score)

                newFileWriter.writerow([os.path.splitext(pic)[0], out])
                print (os.path.splitext(pic)[0], scor,data_test[(data_test.id == os.path.splitext(pic)[0])].landmark_id,label)

                K.clear_session()

testfile = 'Data/test_images_from_train'
predict(testfile)



In [None]:
 toco \ --output_file=trained_model.tflite \ --keras_model_file=bottleneck_fc_model.h5

In [None]:
pip install toco
