In [1]:
# Ran in colab, because my computer is not strong enough.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, BatchNormalization, Add, Activation, Cropping2D, Concatenate
%matplotlib inline

In [10]:
def ResNetUnit(x, filters=64, size=3, activation='relu', padding='same'):
    y = Conv2D(filters, size, activation=activation, padding=padding)(x)
    #y = BatchNormalization()(y)
    y = Conv2D(filters, size, activation=None, padding=padding)(y)
    #y = BatchNormalization()(y)
    if padding=='same':
        y = Add()([x, y])
    else:
        x = Cropping2D(size-1)(x)
        y = Add()([x, y])
    y = Activation(activation)(y)
    return y

In [3]:
def train_model(x_train, y_train, batch_size, epochs):
    inputs = Input(shape=(28, 28, 1))
    x = Conv2D(32, 3, activation='relu')(inputs)
    x = Conv2D(64, 3, activation='relu')(x)
    x = MaxPooling2D(2)(x)
    for _ in range(8):
        x = ResNetUnit(x)
    x = Conv2D(64, 3, activation='relu')(x)
    x = MaxPooling2D(2)(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(10, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)
    return model

# For testing
def train_simple_model(x_train, y_train, batch_size, epochs):
    inputs = Input(shape=(28, 28, 1))
    x = Conv2D(32, 3, activation='relu')(inputs)
    x = Conv2D(64, 3, activation='relu')(x)
    x = MaxPooling2D(2)(x)
    x = ResNetUnit(x)
    x = Conv2D(64, 3, activation='relu')(x)
    x = MaxPooling2D(2)(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(10, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)
    return model

In [4]:
# Load data
df = pd.read_csv('digit-recognizer/train.csv')
data_x = np.array(df.drop('label', axis=1))
data_y = np.array(df['label'])
data_test  = np.array(pd.read_csv('digit-recognizer/test.csv'))

In [5]:
data_x = data_x.reshape(42000, 28, 28, 1).astype('float32')
data_test = data_test.reshape(28000, 28, 28, 1).astype('float32')

In [6]:
data_x /= 255.0
data_test /= 255.0

In [8]:
def train_ensemble(n, batch_size, epochs):
    #with tf.device('/GPU:0'):
    models = []
    total = data_x.shape[0]
    all_inds = np.arange(data_x.shape[0])
    dist = np.repeat(1, data_x.shape[0])
    for i in range(n):
        train_x = []
        train_y = []
        if i == 0:
            # First run uses all samples
            train_x = data_x
            train_y = data_y
        else:
            # Next runs weight samples more if they came up as errors
            inds = np.random.choice(all_inds, data_x.shape[0], replace=False, p=dist/total)
            train_x = data_x[inds]
            train_y = data_y[inds]
        print('Training model', i)
        # Todo: simple model for debug...
        model = train_model(train_x, train_y, batch_size, epochs)
        print('Making predictions')
        raw_pred = model.predict(train_x) # Batch size has to match for this to work, apparently.
        #print(raw_pred[0:10])
        pred = np.argmax(raw_pred, axis=1)
        #print(pred[0:10])
        errors = np.not_equal(pred, train_y)
        #print(train_y[0:10])
        print('Model {0} accuracy is {1:.3f}'.format(i, 100*(1-np.sum(errors)/train_y.shape[0])))
        dist += errors
        total += sum(errors)
        models.append(model)
    return models

In [9]:
def check_ensemble(models, train_x, train_y):
    raw_preds = np.zeros((train_x.shape[0], 10))
    for i in range(len(models)):
        print('Making prediction', i)
        raw_preds += models[i].predict(train_x)
    preds = np.argmax(raw_preds, axis=1)
    errors = np.not_equal(preds, train_y)
    print('Ensemble accuracy is {:.3f}'.format(100*(1 - np.sum(errors)/train_y.shape[0])))

In [None]:
models = train_ensemble(10, 1000, 10)

In [None]:
# This would take forever to cross-validate (it already takes forever to run in the first place)
# This is accuracy on the training dataset, just to verify that the ensembles are helping
check_ensemble(models, data_x, data_y)

In [None]:
# Make test predictions
raw_preds = np.zeros((data_test.shape[0], 10))
for i in range(len(models)):
  print('Making prediction', i)
  raw_preds += models[i].predict(data_test)
preds = np.argmax(raw_preds, axis=1)
print('Prediction complete')

In [None]:
index = [i for i in range(1,data_test.shape[0]+1)]
answers = pd.DataFrame(zip(index, preds), columns=['ImageID', 'Label'])
answers.to_csv('submission_dtree.csv', index=False)

In [None]:
for i in range(len(models)):
    print('Saving model', i)
    models[i].save('rescnn_ensemble_model'+str(i)+'.h5')

In [None]:
# Final result: 99.585% on Kaggle test data!