In [4]:
import numpy as np #linalg
import pandas as pd #IO

import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import cv2

from os import listdir, makedirs, getcwd, remove
from os.path import isfile, join, abspath, exists, isdir, expanduser
from shutil import copy2

import datetime
import re

#Using TensorFlow 
import keras


In [5]:
# VGG19 is slower but more accurate than VGG16
from keras.applications.vgg19 import VGG19 
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

In [6]:
print("\nReading Files")
df_train = pd.read_csv('../labels.csv')
df_test = pd.read_csv('../sample_submission.csv')

#Format data into sample submission format
print("\nFormatting Data and Submission Type")
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)
one_hot_labels = np.asarray(one_hot)


Reading Files

Formatting Data and Submission Type


In [7]:
#Set the Image Rescale size
print("\nBuilding Training Test...")
im_size = 224

#Build the training arrays
x_train = []
y_train = []
x_test = []

i = 0 
for f, breed in tqdm(df_train.values):
    img = cv2.imread('../train/{}.jpg'.format(f))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (im_size, im_size)))
    y_train.append(label)
    i += 1
    
for f in tqdm(df_test['id'].values):
    img = cv2.imread('../test/{}.jpg'.format(f))
    x_test.append(cv2.resize(img, (im_size, im_size)))

  0%|          | 36/10222 [00:00<00:29, 345.21it/s]


Building Training Test...


100%|██████████| 10222/10222 [00:24<00:00, 415.70it/s]
100%|██████████| 10357/10357 [00:24<00:00, 424.88it/s]


In [11]:
def resize_image(filedir):
    im = cv2.resize(cv2.imread(filedir), (224, 224)).astype(np.float32)
    im = im.transpose((2,0,1))
    im = np.expand_dims(im, axis=0)
    return im

In [12]:
print("\nBuilding Training Test...")
im_size = 224

#Build the training arrays
x_train = []
y_train = []
x_test = []


Building Training Test...


In [None]:
i = 0 
for f, breed in tqdm(df_train.values):
    filedir = '../train/{}.jpg'.format(f)
    label = one_hot_labels[i]
    x_train.append(resize_image(filedir))
    y_train.append(label)
    i += 1
    
for f in tqdm(df_test['id'].values):
    filedir = cv2.imread'../test/{}.jpg'.format(f))
    x_test.append(cv2.resize(img, (im_size, im_size)))

In [6]:
num_class = y_train_raw.shape[1]

X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)

print("\n Done")


 Done


In [7]:
print("\n\nBuilding Model...")

base_model = VGG19(weights='imagenet',include_top=False, input_shape=(im_size, im_size, 3))
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False



Building Model...


In [8]:
print("\nTraining Model...")

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

model.fit(X_train, Y_train, epochs=100, validation_data=(X_valid, Y_valid), verbose=1)
print("\nDone")


Training Model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0 

In [9]:
print("\nPredicting")
preds = model.predict(x_test, verbose=1)
sub = pd.DataFrame(preds)

# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(5)

# Write to file
print("\n Printing to Disk")

filename = "Prediction_Dog_Classif_" + re.sub("[^0-9]", "",str(datetime.datetime.now())) + '.csv'
print(filename)
sub.to_csv(filename,index=False)


Predicting

 Printing to Disk
Prediction_Dog_Classif_20171019043253413827.csv
