# Transfer learning with pretrained Keras models


In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs
from os.path import join, exists, expanduser
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19

from keras.applications.resnet50 import ResNet50
from keras.applications import xception
from keras.applications import inception_v3
from keras.applications.vgg16 import preprocess_input, decode_predictions
from sklearn.linear_model import LogisticRegression
from keras import backend as K
K.set_image_dim_ordering('tf')
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

Using TensorFlow backend.


In [2]:

start = dt.datetime.now()

# 120 Classes

In [3]:
INPUT_SIZE = 150
NUM_CLASSES = 120
SEED = 35000
data_dir = 'C:/Users/rites/Desktop/dogbreed/input/'
labels = pd.read_csv(join(data_dir, 'label.csv'))
labels_test = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))
print(len(listdir(join(data_dir, 'test'))), len(labels_test))

30801 30801
10357 10357


In [5]:
selected_breed_list = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(NUM_CLASSES).index)
labels = labels[labels['breed'].isin(selected_breed_list)]
labels['target'] = 1
labels['rank'] = labels.groupby('breed').rank()['id']
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8
y_train = labels_pivot[selected_breed_list].values
ytr = y_train[train_idx]
yv = y_train[valid_idx]


In [6]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

# VGG 16 Feauture Extraction

# Extract VGG16 bottleneck features

In [7]:
INPUT_SIZE = 150
POOLING = 'avg'

x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    x = preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = x
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))



24305it [01:43, 233.86it/s]


Train Images shape: (24305, 150, 150, 3) size: 1,640,587,500


In [8]:
INPUT_SIZE = 150
POOLING = 'avg'

x_test = np.zeros((len(labels_test), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels_test['id'])):
    img = read_img(img_id, 'test', (INPUT_SIZE, INPUT_SIZE))
    x = preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_test[i] = x
print('Test Images shape: {} size: {:,}'.format(x_test.shape, x_test.size))

10357it [00:49, 210.60it/s]


Test Images shape: (10357, 150, 150, 3) size: 699,097,500


In [9]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]

print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
vgg_bottleneck = VGG16(weights='imagenet', include_top=False, pooling=POOLING)
x = vgg_bottleneck.output
x = Dense(512, activation='relu', name='extra_layer_1')(x)
x = Dense(512, activation='relu', name='extra_layer_2')(x)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(256, activation='relu', name='extra_layer_3')(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=vgg_bottleneck.input, outputs=predictions)

for layer in vgg_bottleneck.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


((19415, 150, 150, 3), (4890, 150, 150, 3), (19415, 120), (4890, 120))
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block

In [10]:
model.fit(Xtr, ytr, epochs=2, validation_data=(Xv, yv), verbose=1)

Train on 19415 samples, validate on 4890 samples
Epoch 1/2








Epoch 2/2










<keras.callbacks.History at 0x23166a13cf8>

In [11]:
preds = model.predict(x_test, verbose=1)



In [12]:
targets_series = pd.Series(labels['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)

In [13]:
sub = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', labels_test['id'])
sub.head(5)

Unnamed: 0,id,Afghan_hound,African_hunting_dog,Airedale,American_Staffordshire_terrier,Appenzeller,Australian_terrier,Bedlington_terrier,Bernese_mountain_dog,Blenheim_spaniel,...,schipperke,silky_terrier,soft-coated_wheaten_terrier,standard_poodle,standard_schnauzer,toy_poodle,toy_terrier,vizsla,whippet,wire-haired_fox_terrier
0,000621fb3cbb32d8935728e48679680e,0.044146,0.000836,0.001491983,0.025924,0.031389,0.001793,0.007022,0.000778,0.0001,...,0.000984,0.00693,0.000143,0.020064,0.0064498,7.7e-05,0.000434,0.001953,0.001831,4.57293e-06
1,00102ee9d8eb90812350685311fe5890,5.3e-05,0.002142,4.87695e-07,0.001794,6e-06,1.6e-05,5.4e-05,5e-06,6.5e-05,...,3.4e-05,0.003036,1.3e-05,0.000136,9.78829e-07,2.7e-05,0.000311,5e-06,0.006817,4.646315e-07
2,0012a730dfa437f5f3613fb75efcd4ce,0.005891,0.009408,0.005483827,0.004339,0.004178,0.003435,0.003224,0.0063,0.002957,...,0.019221,0.002095,0.002253,0.003296,0.005883866,0.004804,0.004247,0.010778,0.001348,0.005693787
3,001510bc8570bbeee98c8d80c8a95ec1,0.006861,0.017898,0.007950988,0.016219,0.004661,0.004465,0.005952,0.013875,0.004067,...,0.004622,0.004725,0.012811,0.001946,0.003623846,0.012221,0.001624,0.000949,0.002946,0.001911383
4,001a5f3114548acdefa3d4da05474c2e,0.063607,0.016154,5.255191e-05,0.004238,0.001516,0.050634,0.000908,0.00033,0.003061,...,0.000522,0.000324,0.000124,0.003534,0.0716327,0.000356,0.010759,0.030712,0.000181,0.001421953


In [19]:
sub.to_csv("C:/Users/rites/Desktop/dogbreed/sample_submission.csv")