In [1]:
import warnings
import seaborn as sns
import matplotlib.pylab as plt
import PIL
import pandas as pd
import os
from PIL import Image

import tensorflow as tf
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import Xception, VGG16, ResNet50, InceptionV3
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras import layers, models, optimizers
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import ImageDataGenerator


import numpy as np # linear algebra

from numpy.random import seed

Using TensorFlow backend.


In [2]:
K.clear_session()

In [3]:
SEED = 2
seed(SEED)
tf.set_random_seed(SEED)

In [None]:
DATA_PATH = '../input'
TRAIN_IMG_PATH = os.path.join(DATA_PATH, 'train')
TEST_IMG_PATH = os.path.join(DATA_PATH, 'test')
TRAIN_CROP_PATH = os.path.join(DATA_PATH, 'train_crop_ratio')
TEST_CROP_PATH = os.path.join(DATA_PATH, 'test_crop_ratio')

df_train = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
df_test = pd.read_csv(os.path.join(DATA_PATH, 'test.csv'))
df_class = pd.read_csv(os.path.join(DATA_PATH, 'class.csv'))
image_size=299

In [9]:
def get_generator(train_df, val_df, train_dir, valid_dir, test_df, test_dir, image_size, batch_size,valid_batch_size,
                 scale='rgb', target='class'):
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df, 
        directory=train_dir,
        x_col = 'img_file',
        y_col = target,
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='categorical',
        seed=3,
        color_mode=scale,
        shuffle=True,
        #preprocessing_function=get_random_eraser(v_l=0, v_h=1, pixel_level=True)

    )
    validation_generator = valid_datagen.flow_from_dataframe(
        dataframe=val_df,
        directory=valid_dir,
        x_col = 'img_file',
        y_col = target,
        target_size=(image_size,image_size),
        batch_size=valid_batch_size,
        class_mode='categorical',
        seed=3,
        color_mode=scale,
        shuffle=True,
        #processing_function=preprocess_input
    )
    test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory=test_dir,
        x_col='img_file',
        y_col=None,
        target_size= (image_size,image_size),
        color_mode=scale,
        class_mode=None,
        batch_size=batch_size,
        shuffle=False,
        #preprocessing_function=preprocess_input
    )
    return train_generator, validation_generator, test_generator

In [10]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    zoom_range=0.1,
    fill_mode='nearest'
    )

valid_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

In [11]:
df_train["class"] = df_train["class"].astype('str')
df_train = df_train[['img_file', 'class']]
df_test = df_test[['img_file']]

In [12]:
def split_traindf(df, train_size=0.6, stratify=True, label='class'):
    target = None
    if stratify:
        target = df[label].values
    X_train, X_val = train_test_split(df, train_size=train_size, random_state=SEED, stratify=target)
    X_train = X_train.reset_index(drop=True)
    X_val = X_val.reset_index(drop=True)
    return X_train, X_val

In [12]:
df_train = df_train.iloc[:5000, :]

In [13]:
X_train, X_val = split_traindf(df_train.iloc[:, :], train_size=0.8, stratify=True)
nb_train_sample = X_train.shape[0]
nb_validation_sample = X_val.shape[0]
nb_test_sample = df_test.shape[0]
scale = 'rgb'

In [14]:
batch_size=16
valid_batch_size=16

In [15]:
train_gen, validation_gen, test_gen = get_generator(train_df=X_train,
                                                    val_df=X_val,
                                                    train_dir=TRAIN_CROP_PATH,
                                                    valid_dir=TRAIN_CROP_PATH,
                                                    test_df=df_test,
                                                    test_dir=TEST_CROP_PATH,
                                                    image_size=image_size,
                                                    batch_size=batch_size,
                                                    scale=scale,
                                                    valid_batch_size=valid_batch_size)

Found 8012 validated image filenames belonging to 196 classes.
Found 2004 validated image filenames belonging to 196 classes.
Found 6169 validated image filenames.


In [16]:
def get_model(app, image_size, opt, num_class=196, lr=0.0001):
    if app=='Xception':
        application = Xception
    elif app=='VGG16':
        application = VGG16
    elif app=='ResNet50':
        application = ResNet50
    elif app=='InceptionV3':
        application = InceptionV3
    base_model = application(weights='imagenet', input_shape=(image_size,image_size,3), include_top=False)
    #base_model.trainable = False
    

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(512, activation='sigmoid'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(num_class, activation='softmax'))
    #model.summary()
    
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])

    return model

In [17]:
def get_model_path(model_dir, model_name):
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    from datetime import datetime
    now = datetime.now()
    date_time = now.strftime("%m%d_%H%M")
    model_path = model_dir + date_time + model_name + '.hdf5'
    print('>>model path to save: {}'.format(model_path))
    return model_path

In [18]:
model_type='Xception'
image_size = 299 if model_type=='Xception' or model_type=='InceptionV3' else 224
histories=[]
patient = 3
lr = 0.0001
epoch=300
model_dir = '../xception_model/'
model_name = 'xception_ratiotest_0705'
model_path = get_model_path(model_dir, model_name)
model = get_model(app=model_type, image_size=image_size, opt=optimizers.RMSprop(lr=lr), lr=lr)
print('>>get model completed')

>>model path to save: ../xception_model/0705_0325xception_ratiotest_0705.hdf5
>>get model completed


In [19]:
def get_callback(patient, model_path, lr, total_count):
    callbacks = [
        EarlyStopping(monitor='val_loss',
                      patience=patient,
                      mode='min',
                      verbose=1),
        #ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = patient / 2, min_lr=0.00001, verbose=1, mode='min'),
        ModelCheckpoint(filepath=model_path,
                        monitor='val_loss',
                        verbose=1,
                        save_best_only=True,
                        mode='min'),
        ReduceLROnPlateau(monitor = 'val_loss',
                          factor = 0.5, patience = patient / 2,
                          min_lr=0.00001, verbose=1, mode='min'),

    ]
    return callbacks

In [20]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0:
        return (num_samples // batch_size) + 1
    else:
        return num_samples // batch_size
    

In [21]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch=get_steps(nb_train_sample, batch_size),
    epochs=epoch,
    validation_data=validation_gen,
    validation_steps=get_steps(nb_validation_sample, valid_batch_size),
    verbose=1,
    callbacks=get_callback(patient, model_path, lr, len(X_train))
)

Epoch 1/300

Epoch 00001: val_loss improved from inf to 4.35806, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 2/300

Epoch 00002: val_loss improved from 4.35806 to 3.13813, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 3/300

Epoch 00003: val_loss improved from 3.13813 to 2.28910, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 4/300

Epoch 00004: val_loss improved from 2.28910 to 1.59593, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 5/300

Epoch 00005: val_loss improved from 1.59593 to 1.15375, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 6/300

Epoch 00006: val_loss improved from 1.15375 to 0.83906, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 7/300

Epoch 00007: val_loss improved from 0.83906 to 0.76069, saving model to ../xception_model/0705_0325xception_ratiotest_0705.hdf5
Epoch 8/300

Epoc

use 5000 images
baseline + ratio image test
