In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
# imports for neural networks
from keras.models import Sequential
from tensorflow import keras
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input, ZeroPadding2D, BatchNormalization, Activation, Dropout
from tensorflow.keras import activations
# imports for data
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# imports for callbaks
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

1. Data Preparation

In [None]:
train_mnist = np.loadtxt('/content/drive/MyDrive/Data/digit-recognizer/mnist_train.csv', skiprows=1, delimiter=',')# Training dataset of the standard MNIST
test_mnist = np.loadtxt('/content/drive/MyDrive/Data/digit-recognizer/mnist_test.csv', skiprows=1, delimiter=',')# Test dataset of the standard MNIST
train_dataset = np.loadtxt('/content/drive/MyDrive/Data/digit-recognizer/train.csv', skiprows=1, delimiter=',')# MNIST training dataset from the Kaggle competition
test = np.loadtxt('/content/drive/MyDrive/Data/digit-recognizer/test.csv', skiprows=1, delimiter=',')
dataset = np.concatenate((train_dataset, train_mnist, test_mnist))# Merge datasets

print(dataset.shape)

In [3]:
# New data generator
datagen = ImageDataGenerator(
   rotation_range=10,       # Rotate by a random angle up to 10 degrees
   zoom_range=0.1,          # Increase size up to 10%
   width_shift_range=0.1,   # Shift left/right up to 10 per cent
   height_shift_range=0.1,  # Shift up / down up to 10 per cent
)

In [4]:
# Select the data for training (without the first column with answers)
x_train = dataset[:, 1:]
# Resize the training sample data (28x28x2)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
# Input size
input_shape = (28, 28, 1)

In [5]:
test = test.reshape(test.shape[0], 28, 28, 1)
test /= 255.0
x_train /= 255.0

In [6]:
# Highlight the correct answers
y_train = dataset[:, 0]
# Convert the answers to one hot encoding format
y_train = to_categorical(y_train)

In [7]:
X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size = 0.1, random_state=random.seed())

2. CNN

In [12]:
# Create a convolutional neural network
model = Sequential()

# Input layer
model.add(Input(shape=(28, 28, 1)))
# Add rows and columns with zeros at the top, bottom, left and right of the image
model.add(ZeroPadding2D(padding=(1, 1), input_shape=(28, 28, 1)))
# A convolution layer with a convolution kernel of size 5x5 and a filter of 32
model.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same',
                 activation ='relu', input_shape = (28,28,1)))
# Batch normalisation (speed up learning, stabilise the neural network)
model.add(BatchNormalization())

# Convolution layer with 5x5 convolution kernel and filter 32
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
                 activation ='relu'))
model.add(BatchNormalization())
# "relu" activation function layer
model.add(Activation(activations.relu))
# Maximum pooling operation for 2D spatial data
model.add(MaxPooling2D(pool_size=(2,2)))
# Adding zero rows and columns
model.add(ZeroPadding2D(padding=(1, 1)))
# Applying a regularisation method that randomly drops 20% of neurons from change
model.add(Dropout(0.2))

# A convolution layer with a 3x3 kernel and 64 filters
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Activation(activations.relu))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.2))

# A convolution layer with a 3x3 core and a 256 filter
model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Activation(activations.relu))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.3))

# Smoothing multidimensional tensors into one dimension (convert to a vector of size 1xN)
model.add(Flatten())
# Fully connected layer with "relu" activation function
model.add(Dense(128, activation = "relu"))
model.add(Dropout(0.4))
# Fully connected layer with activation function "softmax" (categorisation)
model.add(Dense(10, activation = "softmax"))

3. Сallbeaks and compilation

In [13]:
# Colback to save the best case of the neural network
checkpoint = ModelCheckpoint('mnist-cnn.hd5',
                             monitor='val_accuracy',  # Proportion of correct answers on the checkpoint set
                             save_best_only=True,     # Save only the best result
                             verbose=1)               # Log output

In [14]:
# Colback to change the learning rate
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', # Metric
                                            patience=3,             # Number of epochs without improvement in score
                                            verbose=1,              # Log output
                                            factor=0.5,             # The factor by which we will multiply the learning rate of the network.
                                            min_lr=0.00001)         # Minimum network learning rate

In [15]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# Mini-sample size
batch_size = 32

# Build a model with a history of changes
history = model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size),
                    epochs=35,
                    validation_data=(X_val, Y_val),
                    steps_per_epoch=X_train.shape[0] // batch_size,
                    verbose=1,
                    callbacks=[checkpoint, learning_rate_reduction],
                    shuffle=True)



In [None]:
probability = model.predict(test)

In [None]:
y_pred = probability.argmax(axis=1)
df = pd.read_csv("/content/drive/MyDrive/Data/digit-recognizer/sample_submission.csv")
df['Label'] = y_pred
df.to_csv("prediction.csv",index=None)