In [None]:
import pandas as pd
import numpy as np

# vanilla ML methods ------------------------------------------------
# linear algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
# metric algorithms
from sklearn.neighbors import KNeighborsClassifier
# ensemble algorithms
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# deep learning -----------------------------------------------------
# to build own CNN from scratch
from keras.layers import Conv2D, MaxPool2D, Dropout, Dense, BatchNormalization, Activation, Flatten
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical

# visualize results / dimensionality reduction / manifold learning (TODO)
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
%matplotlib inline

# utilities / preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split, StratifiedKFold
from sklearn.pipeline import Pipeline

import numpy as np
import pandas as pd
from keras import backend as K
from keras.applications.resnet50 import conv_block, identity_block
from keras.layers import (Activation, BatchNormalization, Convolution2D, Dense,
                          Flatten, Input, MaxPooling2D, ZeroPadding2D)
from keras.models import Model
from keras.optimizers import RMSprop
from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


In [None]:
# define some variables
SHAPE = (28, 28, 1)
bn_axis = 3 if K.image_dim_ordering() == 'tf' else 1

X_train = pd.read_csv(r'trainX.csv', header=None)
X_train = X_train.values.reshape(-1, *SHAPE).astype(float)/255.0

y_train = pd.read_csv(r'trainY.csv', header=None)
test = pd.read_csv(r'testX.csv', header=None)

#normalize the data
max_feature = np.max(X_train,axis=0)
X_train = X_train*1./max_feature
test = test*1./max_feature

X_train = X_train.values.reshape(-1, *SHAPE).astype(float)/255.0
test = test.values.reshape(-1, *SHAPE).astype(float)/255.0

#picture_size = (28, 28)
print(X_train.shape, y_train.shape, test.shape)
#assert (X_train.shape[1]) == picture_size[0]*picture_size[1] # to test whether we have correct picture sizes (784px)
#assert (test.shape[1]) == picture_size[0]*picture_size[1] # to test whether we have correct picture sizes (784px)

In [None]:
y_train['digit'] = np.nan
for i in range(len(y_train)):
    for j in range(10):
        if y_train.iat[i, j] == 1:
            y_train.iat[i, y_train.columns.get_loc('digit')] = j
y_train = y_train['digit'].astype(int)

In [None]:
X, X_holdout, y, y_holdout = train_test_split(
                                            X_train,
                                            y_train,
                                            test_size=0.15,
                                            random_state=42)

In [None]:
def build_model(seed=None):
    # We can't use ResNet50 directly, as it might cause a negative dimension
    # error.
    if seed:
        np.random.seed(seed)

    input_layer = Input(shape=SHAPE)

    x = ZeroPadding2D((3, 3))(input_layer)
    x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
    x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    """
    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    print(x)
    x = AveragePooling2D((7, 7), name='avg_pool')(x)
    """

    x = Flatten()(x)
    x = Dense(10, activation='softmax', name='fc10')(x)

    model = Model(input_layer, x)

    return model


In [None]:
# fit
model = build_model()
# model = Parallelizer().transform(model)
model.compile(RMSprop(lr=1e-4), 'categorical_crossentropy', ['accuracy'])
# batch_size = real_batch_size * n_GPUs
# model.fit(train_x, train_y, batch_size=64*2, nb_epoch=20)
model.fit(X, y, batch_size=64, nb_epoch=20)
# model.save('digit_recognizer_model.h5')

In [None]:
test_predictions = model.predict(test).argmax(1)

In [None]:
test_predictions

In [None]:
d = {'id': test.index.values, 'digit': test_predictions}
df_output = pd.DataFrame(data=d)

df_output.to_csv(
     r'submissions.csv',
     index=False
)