# <font size=10 color=darkslateblue> MNIST SE-NET / dropout / data augmentation / RMSprop / BN </font>
---
<font color=darkslateblue>Written by Maxwell@maxwell110</font>

![fig1](https://cdn-ak.f.st-hatena.com/images/fotolife/g/greenwind120170/20181104/20181104111845.png)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import random

%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import keras
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, multiply
from keras.layers import Dropout, BatchNormalization, Activation
from keras.models import Model, Input
from keras.optimizers import SGD, RMSprop
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

plt.style.use('ggplot')
print(os.listdir("../input"))

## <font color=darkslateblue><b> LOAD DATA </b></font>

In [None]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

In [None]:
train.head()

## <font color=darkslateblue><b> TRAIN AND TEST DATA </b></font>

In [None]:
# normalize
train_x = train.loc[:, 'pixel0':] / 255
train_y = train.label
test_x = test / 255

In [None]:
train_x = train_x.values
test_x = test_x.values
print(train_x.shape, test_x.shape)

In [None]:
# reshape for CNN input
train_x = train_x.reshape(train_x.shape[0], 28, 28, 1)
test_x = test_x.reshape(test_x.shape[0], 28, 28, 1)

In [None]:
# encode
train_y = keras.utils.to_categorical(train_y, 10)
train_y.shape

## <font color=darkslateblue><b> SE-NET MODEL </b></font>

In [None]:
def SEModule(x, filters, ratio=16):
    se = GlobalAveragePooling2D()(x)
    se = Dense(filters // ratio, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    return multiply([x, se])

In [None]:
def simple_cnn(input_shape):
    cnn_in = Input(input_shape)
    cnn = Conv2D(filters=64, 
                 kernel_size=[3, 3], 
                 strides=[1, 1], 
                 padding='same')(cnn_in)
    cnn = BatchNormalization()(cnn)
    cnn = Activation('relu')(cnn)
    cnn = SEModule(cnn, 64)
    cnn = Conv2D(filters=64, 
                 kernel_size=[3, 3], 
                 strides=[1, 1], 
                 padding='same')(cnn)
    cnn = BatchNormalization()(cnn)
    cnn = Activation('relu')(cnn)
    cnn = MaxPooling2D(pool_size=[2, 2])(cnn)
    cnn = Dropout(rate=0.1)(cnn)
    cnn = Conv2D(filters=128, 
                 kernel_size=[3, 3], 
                 strides=[1, 1], 
                 padding='same')(cnn)
    cnn = BatchNormalization()(cnn)
    cnn = Activation('relu')(cnn)
    cnn = SEModule(cnn, 128)
    cnn = Conv2D(filters=128, 
                 kernel_size=[3, 3], 
                 strides=[1, 1], 
                 padding='same')(cnn)
#     cnn = BatchNormalization()(cnn)
    cnn = Activation('relu')(cnn)
    cnn = Dropout(rate=0.2)(cnn)
    cnn_med = Flatten()(cnn)
    cnn_out = Dense(units=10, activation='softmax')(cnn_med)
    
    model = Model(inputs=cnn_in, outputs=cnn_out)
    return model

In [None]:
input_shape = (28, 28, 1)

In [None]:
cnn_model = simple_cnn(input_shape)
cnn_model.summary()

## <font color=darkslateblue><b> LEARNING SCHEDULE </b></font>

In [None]:
def lr_schedule(epoch):
    lr = 0.1
    if epoch > 15:
        lr = lr / 100
    elif epoch > 10:
        lr = lr / 10
    elif epoch > 5:
        lr = lr / 5
#     print('learning rate : ', lr)
    return lr

In [None]:
# lr_scheduler = LearningRateScheduler(lr_schedule)
lr_scheduler = ReduceLROnPlateau(monitor='val_acc',
                                 patience=3,
                                 verbose=1,
                                 factor=0.5,
                                 min_lr=0.000001)

## <font color=darkslateblue><b> MODEL CHECKPOINT </b></font>

In [None]:
# make directory
save_dir = '../model'
os.makedirs(save_dir, exist_ok=True)

model_name = 'mnist.{epoch:03d}.h5'
modelpath = os.path.join(save_dir, model_name)
checkpoint = ModelCheckpoint(
    filepath=modelpath,
    monitor='val_acc',
    verbose=1,
    save_best_only=True,
    period=1)

## <font color=darkslateblue><b> MODEL COMPILE </b></font>

In [None]:
cnn_model.compile(
    optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

## <font color=darkslateblue><b> DATA AUGMENTATION </b></font>

In [None]:
dgen = ImageDataGenerator(
    featurewise_center=False, 
    samplewise_center=False, 
    featurewise_std_normalization=False, 
    samplewise_std_normalization=False, 
    zca_whitening=False, 
    zca_epsilon=1e-06, 
    rotation_range=10,  # degree 
    width_shift_range=0.1, 
    height_shift_range=0.1, 
    brightness_range=None, 
    shear_range=10,  # degree 
    zoom_range=0.1, 
    channel_shift_range=0., 
    fill_mode='nearest', 
    cval=0.0, 
    horizontal_flip=False, 
    vertical_flip=False, 
    rescale=None, 
    preprocessing_function=None, 
    data_format='channels_last', 
    validation_split=0.0
)

In [None]:
n_sampled = 20
fig, ax = plt.subplots(n_sampled // 5, 10, figsize=(15, 1.5 * (n_sampled // 5)))
axes = ax.ravel()
for i in range(n_sampled):
    train_sampled = train_x[np.random.choice(range(train_x.shape[0]))].reshape(1, 28, 28, 1)
    for batch_x in dgen.flow(train_sampled, batch_size=1):
        break
    axes[2 * i].imshow(train_sampled.reshape(28, 28), cmap=plt.cm.Greys)
    axes[2 * i].set_title('original', fontsize=5)
    axes[2 * i + 1].imshow(batch_x.reshape(28, 28), cmap=plt.cm.Greys)
    axes[2 * i + 1].set_title('augmented', fontsize=5)
fig.tight_layout()

## <font color=darkslateblue><b> FIT MODEL AND PREDICT </b></font>

In [None]:
SEED = range(5)

In [None]:
batch_size = 100
epochs = 70
test_pred = np.zeros((test_x.shape[0], 10))
for s in SEED:
    print('\nSEED : {}'.format(s))
    train_x_s, val_x_s, train_y_s, val_y_s = train_test_split(train_x, train_y, test_size = 0.15, random_state=s)
    cnn_model = simple_cnn(input_shape)
    cnn_model.compile(
        optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    cnn_model.fit_generator(
        dgen.flow(train_x_s, train_y_s, batch_size=batch_size),
        steps_per_epoch = train_x_s.shape[0] / batch_size,
        epochs=epochs,
        validation_data=(val_x_s, val_y_s),
        shuffle=True,
        verbose=1,
        callbacks=[lr_scheduler]
    )
    test_pred += cnn_model.predict(test_x)

## <font color=darkslateblue><b> SUBMIT </b></font>

In [None]:
sub = np.argmax(test_pred, axis=1)
sub = pd.Series(sub, name="Label")
sub = pd.concat([pd.Series(range(1 ,28001), name = "ImageId"), sub], axis = 1)
sub.to_csv("simple_cnn_dropout_augmentation.csv", index=False)
sub.head(10)