In [13]:
from __future__ import print_function
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
import numpy as np
import os

## Using pre-trained model VGG16

In [14]:
from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))

## Prepare required image datasets

In [15]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

base_dir = './image_data_1/'

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    return features, labels

train_features, train_labels = extract_features(train_dir, 2315)
validation_features, validation_labels = extract_features(validation_dir, 508)
# test_features, test_labels = extract_features(test_dir, 2000)

Found 2315 images belonging to 5 classes.
Found 508 images belonging to 5 classes.


In [25]:
from keras.preprocessing.image import ImageDataGenerator

# Training parameters
batch_size = 8  
epochs = 20
data_augmentation = True
image_size = (150, 150)
num_classes = 5

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=batch_size)

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=batch_size)

Found 2315 images belonging to 5 classes.
Found 508 images belonging to 5 classes.


## ModelCheckpoint as callbacks

In [28]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

# Prepare model model saving directory.
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'vgg16_model.{epoch:03d}.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

callbacks = [checkpoint]

## Freeze layers

In [30]:
# fine tuning
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

## Training model

In [31]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.adam(lr=1e-5),
              metrics=['acc'])

steps_per_epoch = 2315 // batch_size
validation_steps = 508 // batch_size

history = model.fit_generator(
      train_generator,
      steps_per_epoch=steps_per_epoch,
      epochs=80,
      validation_data=validation_generator,
      validation_steps=validation_steps,
      verbose=1,
      callbacks=callbacks)

Epoch 1/80

Epoch 00001: val_acc improved from -inf to 0.72619, saving model to D:\數據分析\3. Self\100Day-ML-Marathon\Final Exam On Kaggle\saved_models\vgg16_model.001.h5
Epoch 2/80

Epoch 00002: val_acc improved from 0.72619 to 0.77000, saving model to D:\數據分析\3. Self\100Day-ML-Marathon\Final Exam On Kaggle\saved_models\vgg16_model.002.h5
Epoch 3/80

Epoch 00003: val_acc improved from 0.77000 to 0.78400, saving model to D:\數據分析\3. Self\100Day-ML-Marathon\Final Exam On Kaggle\saved_models\vgg16_model.003.h5
Epoch 4/80

Epoch 00004: val_acc improved from 0.78400 to 0.79800, saving model to D:\數據分析\3. Self\100Day-ML-Marathon\Final Exam On Kaggle\saved_models\vgg16_model.004.h5
Epoch 5/80

Epoch 00005: val_acc improved from 0.79800 to 0.80400, saving model to D:\數據分析\3. Self\100Day-ML-Marathon\Final Exam On Kaggle\saved_models\vgg16_model.005.h5
Epoch 6/80

Epoch 00006: val_acc did not improve from 0.80400
Epoch 7/80

Epoch 00007: val_acc improved from 0.80400 to 0.80600, saving model to D:\


Epoch 00080: val_acc did not improve from 0.87800


## Predict and generate submission csv

In [32]:
import pandas as pd
test_filenames = os.listdir(test_dir)
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    test_dir, 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=image_size,
    batch_size=batch_size,
    shuffle=False
)

Found 2000 validated image filenames.


In [40]:
steps = np.ceil(test_generator.samples / batch_size)
predict = model.predict_generator(test_generator, steps=steps)
print(predict.shape)

(2000, 5)


In [41]:
import pandas as pd
submission_df = pd.DataFrame()
submission_df["id"] = [name.split('.')[0] for name in test_generator.filenames]
submission_df['flower_class'] = np.argmax(predict, axis=1)
submission_df.head()

Unnamed: 0,id,flower_class
0,0028624c49b3e0610ff9f1d111f5d532,2
1,002c30700185b7971369258b438070d5,4
2,00852f4f666acecd0c0d140365b42efd,4
3,00c08828fce04e360c732cac01edad9e,4
4,00d366e7877b6a78b104b57d67b60e6b,2


In [42]:
file_name = 'vgg_16_pix150_epoch_80_call_back_val_acc_0.878.csv'
submission_df.to_csv(file_name, index=False)