In [1]:
import numpy as np
import pandas as pd
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.callbacks import Callback, EarlyStopping, TensorBoard
from keras.models import Model, Sequential
from keras.utils import multi_gpu_model
from keras.layers import Dense, Flatten, Dropout, Input, Lambda
from keras.preprocessing import image
from keras import optimizers

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

import os
from tqdm import tqdm
import time

In [2]:
import shutil
def rebase_dataset(name, dataframe, class_header):
    classes = dataframe[class_header].unique()
    for c in classes: os.makedirs("{}/{}".format(name, c), exist_ok=True)
        
    for _, row in dataframe.iterrows():
        shutil.move("train/{}.jpg".format(row["id"]), "{}/{}/{}.jpg".format(name, row[class_header], row["id"]))

In [3]:
np.random.seed = 1
df = pd.read_csv("labels.csv")
# mask = np.random.rand(len(df)) < 0.7
# df_train = df[mask]
# df_valid = df[~mask]

df_train = pd.read_csv("train.csv")
df_valid = pd.read_csv("test.csv")

In [4]:
print("Items in train set: {}, valid set: {}".format(len(df_train), len(df_valid)))

Items in train set: 7160, valid set: 3062


In [5]:
def preprocess_func(x):
    img = preprocess_input(image.img_to_array(x))
    return image.array_to_img(img)

In [6]:
batch_size = 128

train_datagen = image.ImageDataGenerator(
    rotation_range=40, 
    width_shift_range=.2,
    height_shift_range=.2,
    shear_range=.2,
    zoom_range=.2,
    horizontal_flip=True,
    fill_mode="nearest",
)

valid_datagen = image.ImageDataGenerator()

train_generator = train_datagen.flow_from_directory("train/", target_size=(224, 224), batch_size=batch_size)
valid_generator = valid_datagen.flow_from_directory("valid/", target_size=(224, 224), batch_size=batch_size)

Found 7160 images belonging to 120 classes.
Found 3729 images belonging to 120 classes.


In [14]:
vgg = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze the convolutional base model
for layer in vgg.layers:
    layer.trainable = False

# The actual model to be trained
model = Sequential()
# model.add(Lambda(preprocess_input, input_shape=(224, 224, 3)))
model.add(vgg)

# Add new layers
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(120, activation='softmax'))

optimizer = optimizers.SGD(lr=1e-2, momentum=0.9)

model.compile(optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 4096)              102764544 
_________________________________________________________________
dense_5 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dropout_2 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 120)               491640    
Total params: 134,752,184
Trainable params: 120,037,496
Non-trainable params: 14,714,688
_____________________________________________________

In [16]:
now = time.strftime("%Y%m%d_%H%M%S")

callbacks = [EarlyStopping(monitor="val_loss", patience=3), TensorBoard("logs/" + now)]
model.fit_generator(train_generator, epochs=10, callbacks=callbacks, validation_data=valid_generator)

Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 

In [9]:
img = image.load_img("valid/{}/{}.jpg".format(df_valid.iloc[45].breed, df_valid.iloc[45].id), target_size=(224, 224))
img = np.expand_dims(preprocess_input(image.img_to_array(img)), axis=0)
model.predict_classes(img)

array([81])

In [13]:
del model