# Scene Classification

## 3. Train_InceptionResNetV2_top2layers
- Import pkg
- Load sample data, only first 1000 objects

Reference:
- https://challenger.ai/competitions
- https://github.com/jupyter/notebook/issues/2287

**Tensorboard**
1. Input at command:  **tensorboard --logdir=./log**
2. Input at browser:  **http://127.0.0.1:6006**

### Import pkg

In [1]:
import numpy as np
import pandas as pd
# import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, TensorBoard

Using TensorFlow backend.


In [3]:
# import zipfile
import os
import zipfile
import math
import time
from IPython.display import display
import pdb
import json
from PIL import Image
import glob
import pickle

### Load sample data, only first 1000 objects

In [4]:
project_name = 'SC2017'
step_name = 'ResNetV2_Top2Layers'
date_str = time.strftime("%Y%m%d", time.localtime())
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)

run_name: SC2017_ResNetV2_Top2Layers_20171030_213328


In [5]:
cwd = os.getcwd()
print(cwd)

input_path = os.path.join(cwd, 'input')
log_path = os.path.join(cwd, 'log')
model_path = os.path.join(cwd, 'model')
output_path = os.path.join(cwd, 'output')

E:\AIChallenger\SceneClassification2017


In [6]:
datasetName = 'test_a'
date = '20170922'

zip_path = os.path.join(input_path, 'ai_challenger_scene_{0}_{1}.zip'.format(datasetName, date))
extract_path = os.path.join(input_path, 'ai_challenger_scene_{0}_{1}'.format(datasetName, date))
image_path = os.path.join(extract_path, 'scene_{0}_images_{1}'.format(datasetName, date))
scene_classes_path = os.path.join(extract_path, 'scene_classes.csv')
scene_annotations_path = os.path.join(extract_path, 'scene_{0}_annotations_{1}.json'.format(datasetName, date))

print(zip_path)
print(extract_path)
print(image_path)
print(scene_classes_path)
print(scene_annotations_path)

E:\AIChallenger\SceneClassification2017\input\ai_challenger_scene_test_a_20170922.zip
E:\AIChallenger\SceneClassification2017\input\ai_challenger_scene_test_a_20170922
E:\AIChallenger\SceneClassification2017\input\ai_challenger_scene_test_a_20170922\scene_test_a_images_20170922
E:\AIChallenger\SceneClassification2017\input\ai_challenger_scene_test_a_20170922\scene_classes.csv
E:\AIChallenger\SceneClassification2017\input\ai_challenger_scene_test_a_20170922\scene_test_a_annotations_20170922.json


In [7]:
scene_classes = pd.read_csv(scene_classes_path, header=None)
display(scene_classes.head())

Unnamed: 0,0,1,2
0,0,航站楼,airport_terminal
1,1,停机坪,landing_field
2,2,机舱,airplane_cabin
3,3,游乐场,amusement_park
4,4,冰场,skating_rink


In [8]:
def get_scene_name(lable_number, scene_classes_path):
    scene_classes = pd.read_csv(scene_classes_path, header=None)
    return scene_classes.loc[lable_number, 2]
print(get_scene_name(0, scene_classes_path))

airport_terminal


### Load model

In [56]:
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

# from keras.applications.xception import Xception
# from keras.applications.vgg16 import VGG16
# from keras.applications.vgg19 import VGG19
# from keras.applications.resnet50 import ResNet50
# from keras.applications.inception_v3 import InceptionV3
from deep_learning_models_0_5.inception_v3 import InceptionV3
# from deep_learning_models_0_5.inception_resnet_v2 import InceptionResNetV2

In [57]:
%pdb off

Automatic pdb calling has been turned OFF


In [58]:
%%time
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(80, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-4), metrics=["accuracy"])
for i, layer in enumerate(model.layers):
   print(i, layer.name)

TypeError: _obtain_input_shape() got an unexpected keyword argument 'include_top'

In [51]:
%%time
def saveModel(model, modelPath, fileName):
    if not os.path.isdir(modelPath):
        os.mkdir(modelPath)
    weigthsFile = os.path.join(modelPath, fileName + '.h5')
    model.save(weigthsFile)
# saveModel(model, model_path, 'ModelSaveTest')

Wall time: 0 ns


**Train top 2 inception**

In [18]:
%%time
def get_lr(x):
    lr = 2e-3 * 0.85 ** x
    if lr < 1e-4:
        lr = 1e-4
    print(lr)
    return lr
# annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)
annealer = LearningRateScheduler(get_lr)
log_dir = os.path.join(log_path, run_name)
print(log_dir)
tensorBoard = TensorBoard(log_dir=log_dir)

batch_size = 64
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range = 20,
                                   zoom_range = 0.1,
                                   width_shift_range = 0.1,
                                   height_shift_range = 0.1,
                                   horizontal_flip = True,
                                   vertical_flip = True)
data_train_path = os.path.join(input_path, 'data_train')
data_validation_path = os.path.join(input_path, 'data_validation')
train_generator = train_datagen.flow_from_directory(data_train_path,
                                                    target_size=(299, 299),
                                                    batch_size=batch_size,
                                                    class_mode = "categorical")
validation_generator = train_datagen.flow_from_directory(data_validation_path,
                                                    target_size=(299, 299),
                                                    batch_size=batch_size,
                                                    class_mode = "categorical")
# print(train_generator.classes[0:1000])
# print(validation_generator.classes[0:1000])
image_amount = len(train_generator.classes)
print('image_amount:\t{0}'.format(image_amount))
steps_per_epoch = int(image_amount / batch_size)
print('steps_per_epoch:\t{0}'.format(steps_per_epoch))
max_queue_size = 128
print('max_queue_size:\t{0}'.format(max_queue_size))
validation_steps = int(512 / batch_size)
print('validation_steps:\t{0}'.format(validation_steps))

E:\AIChallenger\SceneClassification2017\log\SC2017_ResNetV2_Top2Layers_20171030_213328
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
image_amount:	53879
steps_per_epoch:	841
max_queue_size:	128
validation_steps:	8
Wall time: 6.81 s


In [19]:
hist = model.fit_generator(train_generator,
                           steps_per_epoch=steps_per_epoch,
                           max_queue_size = max_queue_size,
                           workers = 32,
                           epochs=20, #Increase this when not on Kaggle kernel
                           verbose=1,  #1 for ETA, 0 for silent
                           validation_data=validation_generator,
                           validation_steps=validation_steps,
                           callbacks=[annealer, tensorBoard])

KeyboardInterrupt: 

In [None]:
final_loss, final_acc = model.evaluate_generator(generator=validation_generator, steps=8, max_q_size=10, workers=1)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_acc))

In [None]:
run_name_acc = run_name + '_' + str(int(final_acc*10000)).zfill(4)
saveModel(model, model_path, run_name_acc)

In [None]:
plt.plot(hist.history['loss'], color='b')
plt.plot(hist.history['val_loss'], color='r')
plt.show()
plt.plot(hist.history['acc'], color='b')
plt.plot(hist.history['val_acc'], color='r')
plt.show()

In [None]:
print('run_name_acc: ' + run_name_acc)
print('Done!')