In [26]:
import numpy as np
import pandas as pd
import datetime as dt
import keras
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs, getcwd, remove
from os.path import isfile, join, abspath, exists, isdir, expanduser
from tqdm import tqdm
from keras.models import Model, Sequential
from keras.layers import Input, Flatten, Dense, Conv2D, MaxPooling2D, Dropout
from keras.utils import layer_utils
from keras import backend as K
from keras.optimizers import RMSprop, SGD, Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard, CSVLogger
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score
from keras.preprocessing import image
from keras.applications.vgg19 import VGG19
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications import xception
from keras.applications import inception_v3
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.preprocessing.image import ImageDataGenerator
from sklearn.linear_model import LogisticRegression

In [10]:
INPUT_SIZE = 224
NUM_CLASSES = 16
SEED = 1987
data_dir = '../'
labels = pd.read_csv(join(data_dir, 'labels.csv'))
sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))
print(len(listdir(join(data_dir, 'test'))), len(sample_submission))

10222 10222
10357 10357


In [11]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [12]:
INPUT_SIZE = 224
POOLING = 'avg'
x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    #print(img.shape)
    x = np.expand_dims(img, axis=0)
    x = preprocess_input(x)
    x_train[i] = x
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

10222it [00:31, 320.72it/s]

Train Images shape: (10222, 224, 224, 3) size: 1,538,697,216





In [15]:
print("\nReading Files")
df_train = pd.read_csv('../labels.csv')
df_test = pd.read_csv('../sample_submission.csv')

print("\nFormatting Data and Submission Type")
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)

y_train = []
i = 0 
for i, img_id in tqdm(enumerate(labels['id'])):
    label = one_hot_labels[i]
    y_train.append(label)
    i += 1


Reading Files


10222it [00:00, 1681010.61it/s]


Formatting Data and Submission Type





In [17]:
y_train_raw = np.array(y_train, np.uint8)
num_class = y_train_raw.shape[1]
print(num_class)

120


In [18]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train, y_train_raw, test_size=0.3, random_state=1)

In [21]:
base_model = VGG19(weights = 'imagenet', include_top=False, input_shape=(224, 224, 3))

In [22]:
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

In [23]:
model = Model(inputs=base_model.input, outputs=predictions)

In [24]:
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [27]:
callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [None]:
model.fit(X_train, Y_train, epochs=50, validation_data=(X_valid, Y_valid), verbose=1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/50
 128/7155 [..............................] - ETA: 6744s - loss: 15.8830 - acc: 0.0078    