In [1]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import load_img, img_to_array
from keras import optimizers
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image

import os
import numpy as np
import pandas as pd
import random
import glob
import math

from PIL import ImageFile
from matplotlib.pyplot import imshow

Using TensorFlow backend.


In [2]:
%matplotlib inline

# Loading the data

In [3]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [4]:
def get_im(path):
    # Load as grayscale
    img = image.load_img(path, target_size=(150, 150))
    # Reduce size
    resized = image.img_to_array(img)
    return resized

In [11]:
# Get Data
def load_train():
    X_train = []
    y_train = []
    print('Read train images')
    thr = math.floor(1481/10)
    total = 0
    for j in range(1,4):
        print('Load folder Type_{}'.format(j))
        path = os.path.join('data', 'train', 'Type_' + str(j), '*.jpg')
        files = glob.glob(path)
        for fl in files:
            img = get_im(fl)
            X_train.append(img)
            y_train.append(j)
            total += 1
            if total%thr == 0:
                print('Read {} images from 1485'.format(total))

    return X_train, y_train

In [12]:
def load_test():
    print('Read test images')
    path = 'data/test/unknown/*.jpg'
    files = glob.glob(path)
    X_test = []
    X_test_id = []
    total = 0
    thr = math.floor(len(files)/10)
    for fl in files:
        flbase = os.path.basename(fl)
        img = get_im(fl)
        X_test.append(img)
        X_test_id.append(flbase)
        total += 1
        if total%thr == 0:
            print('Read {} images from {}'.format(total, len(files)))

    return X_test

In [13]:
X_test = load_test()

Read test images
Read 51 images from 512
Read 102 images from 512
Read 153 images from 512
Read 204 images from 512
Read 255 images from 512
Read 306 images from 512
Read 357 images from 512
Read 408 images from 512
Read 459 images from 512
Read 510 images from 512


In [14]:
X_train, y_train = load_train()

Read train images
Load folder Type_1
Read 148 images from 1485
Load folder Type_2
Read 296 images from 1485
Read 444 images from 1485
Read 592 images from 1485
Read 740 images from 1485
Read 888 images from 1485
Load folder Type_3
Read 1036 images from 1485
Read 1184 images from 1485
Read 1332 images from 1485
Read 1480 images from 1485


In [15]:
# Zero centering
X_train_centered = X_train
X_train_centered -= np.mean(X_train_centered, axis = 0) 

In [16]:
train_data_dir = 'data/train'

test_data_dir = 'data/test'
results_path = 'results/predictions'
results_name = 'predictions.csv'

In [17]:
# dimensions of our images.
img_width, img_height = 150, 150

# Found 24198 images belonging to 2 classes.
# Found 802 images belonging to 2 classes.
nb_train_samples = 1481

epochs = 20
batch_size = 16

In [18]:
base_model = applications.VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(img_width, img_height, 3))
print('Model Loaded')

Model Loaded


In [20]:
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(3, activation='sigmoid'))

In [21]:
# add the model on top of the convolutional base
model = Model(input=base_model.input, output=top_model(base_model.output))

  


In [22]:
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:25]:
    layer.trainable = False

In [23]:
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

In [27]:
print(len(X_train), len(y_train))

1481 1481


In [21]:
# checkpoint
filepath="vgg-weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [24]:
history = model.fit(X_train, Y_train, batch_size=16, epochs=2, callbacks=callbacks_list, validation_split=0.33, shuffle=True, initial_epoch=0)

Train on 1571 samples, validate on 775 samples
Epoch 1/2


KeyError: '[1079  805  831  303 1550   45  887 1303 1138  709 1115  470 1416  355  868\n 1486  696 1380 1070   66 1442  257 1291 1190 1170 1290  522 1379 1007 1436\n  673 1350] not in index'

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model_vgg.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_vgg.h5")
print("Saved model to disk")

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()