## Full Keras with transfer learning and augmentation 

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import skimage 
from scipy import ndimage, misc

In [3]:
import cv2
def plot_image(file_path):
    """ Print image from file 'file_path' """
    img = cv2.imread(file_path)
    cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb)
    plt.show()

In [4]:
from keras.utils import np_utils
from sklearn.datasets import load_files

data = load_files('prep224/train')
caltech_files_train = np.array(data['filenames'])
caltech_targets_train = np.array(data['target'])

data_val = load_files('prep224/val')
caltech_files_val = np.array(data_val['filenames'])
caltech_targets_val = np.array(data_val['target'])

X_train, X_val = caltech_files_train, caltech_files_val
y_train_class, y_val_class = caltech_targets_train, caltech_targets_val
y_train = np_utils.to_categorical(y_train_class, 256)
y_val = np_utils.to_categorical(y_val_class, 256)

del data, data_val, caltech_files_train, caltech_targets_train, caltech_files_val, caltech_targets_val

Using TensorFlow backend.


In [5]:
test_files = ['prep224/test/'+ file for file in os.listdir('prep224/test')]
#test_files[:5]

In [6]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

resize = (224, 224)

CHANNELS = 3
ROWS = resize[0]
COLS = resize[1]
mean = 135

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

## Data load and resize

In [7]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path, resize = resize):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=resize)
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths, resize = resize):
    list_of_tensors = [path_to_tensor(img_path, resize) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [8]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_data = paths_to_tensor(X_train, resize)
valid_data = paths_to_tensor(X_val, resize)
test_data = paths_to_tensor(test_files, resize).astype('float32')/255

100%|██████████| 15700/15700 [00:14<00:00, 1076.30it/s]
100%|██████████| 6400/6400 [00:05<00:00, 1221.09it/s]
100%|██████████| 7680/7680 [00:06<00:00, 1178.50it/s]


In [9]:
print(train_data.shape)
print(valid_data.shape)
print(test_data.shape)

(15700, 224, 224, 3)
(6400, 224, 224, 3)
(7680, 224, 224, 3)


In [10]:
batch_size = 16

train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        shear_range=0.2,
        zoom_range=0.2,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True)

# only rescaling for test set
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'prep224/train',  # this is the target directory
        target_size=(ROWS, COLS),
        batch_size=batch_size,
        class_mode='categorical')

# this is a similar generator, for validation data
validation_generator = train_datagen.flow_from_directory(
        'prep224/val',
        target_size=(ROWS, COLS),
        batch_size=batch_size,
        class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    'prep224/test',
    target_size=(64, 64),
    class_mode=None,
    batch_size=64,
    shuffle=False)

Found 15700 images belonging to 256 classes.
Found 6400 images belonging to 256 classes.
Found 0 images belonging to 0 classes.


In [56]:
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential, Model


model_inc = ResNet50(weights='imagenet',input_shape=(224,224,3), include_top=False)#, pooling = 'avg', )
#,input_shape=(224,224,3)
for layer in model_inc.layers[:170]:
    layer.trainable = False

x = model_inc.get_layer(index=-2).output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)

# x = model_resnet.output
#x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
#x = BatchNormalization()(x)
predictions = Dense(256, activation="softmax")(x)

# # creating the final model 
model = Model(inputs = model_inc.input, outputs = predictions)
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_9 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_7 (ZeroPadding2D) (None, 230, 230, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472                                         
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256                                          
___________________________________________________________________________________________

In [57]:
len(model_inc.layers)

175

In [58]:
from keras.optimizers import Adam
model.compile(optimizer=Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [59]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

epochs = 15

checkpointer = [ModelCheckpoint(filepath='saved_models/weights.best.keras_resnet1.hdf5', 
                               verbose=1, save_best_only=True),
               ReduceLROnPlateau(verbose=1, patience=3, factor = 0.1), 
               EarlyStopping(verbose=1, patience=5, monitor='val_loss')]

In [60]:
model.fit_generator(
        train_generator,
        steps_per_epoch=3*len(train_data) / batch_size,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=len(valid_data) / batch_size, callbacks=checkpointer, verbose=1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15

Epoch 00005: reducing learning rate to 0.00010000000474974513.
Epoch 7/15
Epoch 8/15
Epoch 00007: early stopping


<keras.callbacks.History at 0x7ff2800c4ac8>

In [61]:
model.load_weights('saved_models/weights.best.keras_resnet1.hdf5')
predictions = [np.argmax(model.predict(np.expand_dims(feature, axis=0))) for feature in valid_data]
# get val accuracy
val_accuracy = 100*np.sum(np.array(predictions)==np.argmax(y_val, axis=1))/len(predictions)
print('Validation accuracy: %.2f%%' % val_accuracy)

Validation accuracy: 0.39%


In [63]:
predictions_test = [1+np.argmax(model.predict(np.expand_dims(feature, axis=0))) for feature in test_data]

In [64]:
image = [item.split('/')[2] for item in test_files]
submission = pd.DataFrame(
    {'image': image,
     'class': predictions_test
    })
submission = submission[['image','class']]
submission.head()

Unnamed: 0,image,class
0,16800.jpg,148
1,19320.jpg,161
2,21505.jpg,200
3,11014.jpg,232
4,10790.jpg,38


In [65]:
submission.to_csv('submissions/resnet_transfer.csv', index = False)

In [34]:
1+1

2