In [1]:
import keras

Using TensorFlow backend.


In [2]:
from keras.applications import VGG16, Xception, InceptionV3, ResNet50
from keras.models import Model
from keras.layers import AveragePooling2D, Flatten, Dense, Input, GlobalAveragePooling2D, Dropout
from keras.preprocessing.image import ImageDataGenerator
import h5py

In [3]:
train_gen = ImageDataGenerator(rescale=1./255)
validation_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

In [4]:
train_data = train_gen.flow_from_directory('../data/train', 
                                           target_size=(224, 224), 
                                           classes=['cat_train', 'dog_train'], 
                                           class_mode='binary', shuffle=False)
validation_data = validation_gen.flow_from_directory('../data/validation', 
                                                     target_size=(224, 224), 
                                                     classes=['cat', 'dog'], 
                                                     class_mode='binary', shuffle=False)
test_data = test_gen.flow_from_directory('../data', 
                                        target_size=(224, 224),
                                        classes=['test'],
                                        class_mode=None, shuffle=False)

Found 24600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [5]:
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

In [14]:
vgg_train_featuremap = vgg_model.predict_generator(train_data, steps=(train_data.n//train_data.batch_size+1))

In [16]:
vgg_validation_featuremap = vgg_model.predict_generator(validation_data, 
                                                        steps=(validation_data.n//validation_data.batch_size+1))

In [11]:
with h5py.File('../data/vgg.h5', 'w') as f:
    f.create_dataset('train', data=vgg_train_featuremap)
    f.create_dataset('validation', data=vgg_validation_featuremap)

In [10]:
f = h5py.File('../data/vgg.h5', 'r')
vgg_train_featuremap = f.get('train')
vgg_validation_featuremap = f.get('validation')

In [12]:
from keras.models import Sequential

In [13]:
mymodel = Sequential()
mymodel.add(Flatten(input_shape=vgg_train_featuremap.shape[1:]))
mymodel.add(Dense(256, activation='relu'))
mymodel.add(Dropout(0.5))
mymodel.add(Dense(1, activation='sigmoid'))

In [14]:
mymodel.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [15]:
import numpy as np
train_label = np.array([0]*12300+[1]*12300)

In [16]:
validation_label = np.array([0]*200 + [1]*200)

In [27]:
mymodel.fit(vgg_train_featuremap.value, train_label, epochs=20, batch_size=50, 
           validation_data=[vgg_validation_featuremap.value, validation_label])

Train on 24600 samples, validate on 400 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f3163a295c0>

In [28]:
model_weight = mymodel.get_weights()

In [39]:
input_ten = Input(vgg_train_featuremap.shape[1:])
x = Flatten()(input_ten)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
top_model = Model(input_ten, x)

In [40]:
top_model.set_weights(model_weight)

In [41]:
input_tensor = Input(shape=(224, 224, 3))
x = vgg_model.get_layer(index=1)(input_tensor)
for i in range(2, len(vgg_model.layers)):
    x = vgg_model.get_layer(index=i)(x)
x = top_model(x)
new_model = Model(input_tensor, x)

In [42]:
for layer in new_model.layers[:-5]:
    layer.trainable = False

In [43]:
new_model.compile(optimizer=keras.optimizers.SGD(lr=1e-5, momentum=0.9), loss='binary_crossentropy', 
                  metrics=['accuracy'])

In [44]:
train_generator = ImageDataGenerator(rescale=1./255)
vali_generator = ImageDataGenerator(rescale=1./255)

In [45]:
generator_a = train_generator.flow_from_directory('../data/train', target_size=(224, 224), 
                                                  classes=['cat_train', 'dog_train'], class_mode='binary', 
                                                 shuffle=True)

Found 24600 images belonging to 2 classes.


In [46]:
generator_b = vali_generator.flow_from_directory('../data/validation/', target_size=(224, 224), 
                                                 classes=['cat', 'dog'], class_mode='binary', shuffle=True)

Found 400 images belonging to 2 classes.


In [49]:
new_model.fit_generator(train_data, steps_per_epoch=(train_data.n//train_data.batch_size+1), 
                       epochs=3, validation_data=validation_data, 
                       validation_steps=(validation_data.n//validation_data.batch_size+1))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f3163a43fd0>

In [5]:
inceptionv3_model = InceptionV3(include_top=False, input_shape=(224, 224, 3))

In [7]:
incep_train_featuremap = inceptionv3_model.predict_generator(train_data, 
                                                             steps=(train_data.n//train_data.batch_size+1))

In [8]:
run_step = validation_data.n//validation_data.batch_size+1
incep_validation_featuremap = inceptionv3_model.predict_generator(validation_data, 
                                                                  steps=run_step)

In [10]:
from keras.models import Sequential
mymodel = Sequential()
mymodel.add(Flatten(input_shape=incep_train_featuremap.shape[1:]))
mymodel.add(Dense(256, activation='relu'))
mymodel.add(Dropout(0.5))
mymodel.add(Dense(1, activation='sigmoid'))

In [11]:
mymodel.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
import numpy as np
train_label = np.array([0]*12300+[1]*12300)
validation_label = np.array([0]*200 + [1]*200)

In [14]:
mymodel.fit(incep_train_featuremap, train_label, epochs=50, batch_size=50, 
           validation_data=[incep_validation_featuremap, validation_label])

Train on 24600 samples, validate on 400 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f768495ecc0>

In [20]:
test_featuremap = inceptionv3_model.predict_generator(test_data, steps=(test_data.n//test_data.batch_size+1))

In [22]:
test_featuremap.shape

(12500, 5, 5, 2048)

In [23]:
incep_predict = mymodel.predict(test_featuremap)

In [40]:
incep_predict = incep_predict.clip(0.05, 0.95)

In [26]:
import pandas as pd

In [41]:
df = pd.read_csv('../submission/prediction.csv')

In [28]:
import os

In [42]:
for i, fname in enumerate(os.listdir('../data/test')):
    id = int(fname.split('.')[0])
    df.set_value(i, col='id', value=id)
    df.set_value(i, col='label', value=incep_predict[i])

In [43]:
df = df.sort_values(by='id')

In [36]:
df.head()

Unnamed: 0,id,label
3767,1,1.0
5019,2,4.656537e-09
8392,3,0.9999998
3594,4,1.371836e-12
2819,5,0.9999999


In [44]:
df.to_csv('../submission/new_pred.csv', index=False)

In [52]:
resnet_model = ResNet50(include_top=False, input_shape=(224, 224, 3))

In [53]:
res_train_featuremap = resnet_model.predict_generator(train_data, 
                                                      steps=(train_data.n//train_data.batch_size+1))

In [125]:
res_validaiont_featuremap = resnet_model.predict_generator(validation_data, 
                                                           steps=(validation_data.n//validation_data.batch_size+1))