In [1]:
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.preprocessing import image
from keras.callbacks import TensorBoard,History,EarlyStopping,CSVLogger
from sklearn.utils import shuffle
import numpy as np
import pandas as pd
import h5py
import os

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


In [2]:
train_path = './train'
validation_path = './validation'
test_path = './test'
assert(os.path.exists(train_path))
assert(os.path.exists(validation_path))
assert(os.path.exists(test_path))

nb_train_samples = 20000
nb_val_samples = 2424
image_size = (224,224)
batch_size = 128
class_num = 10

In [25]:
def build_simple_CNN(input_shape=(224,224,3),class_num=10):
    
    model = Sequential()
    model.add(Conv2D(32,(3,3),input_shape=input_shape,activation='relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPool2D())
    
    model.add(Conv2D(32,(3,3),activation='relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPool2D())
    
    model.add(Conv2D(64,(3,3),activation='relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPool2D()) 
    
    model.add(Flatten())
    model.add(Dense(256,activation='relu'))
    model.add(BatchNormalization(axis=-1))
    model.add(Dropout(0.5))
    model.add(Dense(class_num,activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',optimizer=optimizers.Adadelta(),metrics=['accuracy'])
    
    
    return model

In [6]:
def image_preprocess(img_path):

    img = image.load_img(img_path, target_size=(224, 224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = img / 255.0
    
    return img

In [7]:
def test_model(model,test_path,csv='sample_submission.csv'):
    columns = ['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
    df = pd.DataFrame(columns=columns)
    
    file_list = os.listdir(test_path)
    for i,file in enumerate(file_list):
        test_data = image_preprocess(test_path + '/' + file)
        y_pred = model.predict(test_data,batch_size=1,verbose=0)
        y_pred = np.clip(y_pred,0.001,0.999)
        y_pred = y_pred[0].tolist()
        df.loc[i] = [file] + y_pred

        
    df.to_csv(csv,index=None)

In [26]:
train_datagen = image.ImageDataGenerator(rescale=1./255)

validation_datagen = image.ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
                                    train_path,
                                    target_size=image_size,
                                    batch_size=batch_size,
                                    class_mode='categorical',
                                    shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
                                    validation_path,
                                    target_size=image_size,
                                    batch_size=batch_size,                                    
                                    class_mode='categorical',
                                    shuffle=False)


Found 20000 images belonging to 10 classes.
Found 2424 images belonging to 10 classes.


In [27]:
model = build_simple_CNN()
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
batch_normalization_7 (Batch (None, 222, 222, 32)      128       
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 109, 109, 32)      9248      
_________________________________________________________________
batch_normalization_8 (Batch (None, 109, 109, 32)      128       
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 54, 54, 32)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 52, 52, 64)        18496     
__________

In [28]:
model.fit_generator(train_generator,
                    steps_per_epoch=nb_train_samples // batch_size,
                    epochs=10,
                    callbacks=[EarlyStopping(patience=3)],
                    validation_data=validation_generator,
                    validation_steps=nb_val_samples // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


<keras.callbacks.History at 0x7f60e377bd68>

In [29]:
train_datagen = image.ImageDataGenerator(rescale=1./255,
                                         shear_range=0.2,
                                         zoom_range=0.2,
                                         rotation_range=15,
                                         height_shift_range=0.2,
                                         width_shift_range=0.2,
                                         channel_shift_range=10,
                                         horizontal_flip=True)

validation_datagen = image.ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
                                    train_path,
                                    target_size=image_size,
                                    batch_size=batch_size,
                                    class_mode='categorical',
                                    shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
                                    validation_path,
                                    target_size=image_size,
                                    batch_size=batch_size,                                    
                                    class_mode='categorical',
                                    shuffle=False)

Found 20000 images belonging to 10 classes.
Found 2424 images belonging to 10 classes.


In [32]:
model = build_simple_CNN()
history = model.fit_generator(train_generator,
                    steps_per_epoch=nb_train_samples // batch_size,
                    epochs=10,
                    validation_data=validation_generator,
                    validation_steps=nb_val_samples // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [33]:
print(history.history)

{'val_loss': [3.535255399015215, 2.112108870250423, 1.7576891528604752, 2.2322484383599686, 3.358825390438066, 2.203625086707936, 1.2739743555049032, 1.294378822688857, 1.7676885667993631, 2.1275294200883925], 'val_acc': [0.1032986111111111, 0.30618466898954705, 0.4616724738675958, 0.4259581881533101, 0.2652439024390244, 0.3967770041073656, 0.6306620203867191, 0.5688153309585325, 0.5500871080918179, 0.5113240418767472], 'loss': [2.5182303343063746, 1.9220908062357733, 1.5976088434793716, 1.3910034299856606, 1.1884645793748938, 1.0432066741772896, 0.9187738998578145, 0.799754138806877, 0.733280458887994, 0.6678156073350645], 'acc': [0.20182291666666666, 0.33206917475728154, 0.43307165861513686, 0.5114231078904992, 0.5849436392914654, 0.6355173107890499, 0.6869491185897436, 0.7297734627831716, 0.7519625603864735, 0.7782306763285024]}


In [35]:
model.save('simple_cnn_model.h5')

In [37]:
model = load_model('simple_cnn_model.h5')

sgd = SGD(lr=0.0001, momentum=0.9)
model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])
history = model.fit_generator(train_generator,
                    steps_per_epoch=nb_train_samples // batch_size,
                    epochs=5,
                    validation_data=validation_generator,
                    validation_steps=nb_val_samples // batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [39]:
history = model.fit_generator(train_generator,
                    steps_per_epoch=nb_train_samples // batch_size,
                    epochs=5,
                    validation_data=validation_generator,
                    validation_steps=nb_val_samples // batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
model.save('simple_cnn_final_model.h5')

In [8]:
model = load_model('simple_cnn_model.h5')

test_model(model,test_path,csv='simple_cnn_submission.csv')