In [None]:

import keras
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, MaxPool2D
from keras.optimizers import RMSprop
from keras.applications.vgg19 import VGG19
from keras.models import Model
import pandas as pd
import numpy as np
import random as rn
import math
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from numba import vectorize
from tqdm import tqdm
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.util import crop, pad
from skimage.morphology import label
from skimage.color import rgb2gray, gray2rgb
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from joblib import Parallel, delayed
import cv2
import os
import sys

# Read in Dog Images

In [None]:
IMG_WIDTH = 150
IMG_HEIGHT = 150
IMG_CHANNELS = 3
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
BATCH_SIZE = 32
TRAIN_PATH = '../input/train/'
TEST_PATH = '../input/test/'

train_labels = pd.read_csv('../input/labels.csv')

# train_ids = next(os.walk(TRAIN_PATH))[2]


In [None]:
train_ids = list(train_labels["id"].values)

targets_series = pd.Series(train_labels['breed'])
one_hot = pd.get_dummies(targets_series, sparse = False)
Y_one_hot = np.asarray(one_hot)

train_ids[:10]

In [None]:
%%time
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
# Y_train = np.zeros((len(train_ids), dtype=object))
final_train_ids = []
missed_ids =[]
missing_count = 0
print('Getting train images ... ')
sys.stdout.flush()
    
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    try:
        path = TRAIN_PATH + id_+'.jpg'
        img = imread(path)
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_train[n-missing_count] = img
#         print(id_)
    except:
        missed_ids.append(id_)
        missing_count += 1
        
print("Total missing: "+ str(missing_count))
X_train = X_train[0:X_train.shape[0]-missing_count]

In [None]:
print(one_hot.columns.values[Y_one_hot[0].argmax()])
plt.imshow(X_train[0])

In [None]:
X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, Y_one_hot, test_size=0.2, random_state=1)
X_train.shape

# Data Augmentation

In [None]:
datagen = ImageDataGenerator(
        rotation_range=45,
        zoom_range = 0.1,
        shear_range=0.15,
        channel_shift_range=0.1,
        width_shift_range=0.15,
        height_shift_range=0.15,
        fill_mode='constant',
        cval=0,
        horizontal_flip=True,
        vertical_flip=True
)


datagen.fit(X_train, augment=True, seed=1)

# Build the Model

In [None]:
base_model = VGG19(weights = 'imagenet', include_top=False, input_shape=INPUT_SHAPE)

x = base_model.output
x = Convolution2D(256, (2,2), activation='elu', padding='same')(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(120, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])
model.summary()

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                            mode='min',
                                            patience=5, 
                                            verbose=1, 
                                            factor=0.5,
                                            min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss',
                             patience=10,
                             verbose=1,
                             mode='min',
                            restore_best_weights=True)
filepath = "Dog_Model.h5"
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             verbose=1,
                            )

model_callbacks = [learning_rate_reduction, checkpoint, early_stopping]

In [None]:
BATCH_SIZE = 128
history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=BATCH_SIZE),
                              epochs = 200,
                              validation_data = (X_valid, Y_valid),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // BATCH_SIZE
                              , callbacks=model_callbacks)

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.show()

In [None]:
%%time

del X_train
df_test = pd.read_csv('../input/sample_submission.csv')
test_ids = list(df_test["id"].values)
test_ids[:10]

In [None]:
%%time
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
missed_ids =[]
missing_count = 0
print('Getting train images ... ')
sys.stdout.flush()
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    try:
        path = TEST_PATH + id_+'.jpg'
        img = imread(path)
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), mode='constant', preserve_range=True)
        X_test[n-missing_count] = img
    except:
        missed_ids.append(id_)
        missing_count += 1
        
print("Total missing: "+ str(missing_count))
X_test = X_test[0:X_test.shape[0]-missing_count]

In [None]:
preds = model.predict(X_test, verbose=1)

In [None]:
sub = pd.DataFrame(preds)
col_names = one_hot.columns.values
sub.columns = col_names
sub.insert(0, 'id', df_test['id'])
sub.head(5)

In [None]:
sub.to_csv("Dog_Submission.csv", index=False, header=True)