In [None]:
# --- Import packages --- #
import tensorflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import itertools
import operator

from keras.utils.np_utils import to_categorical 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, Lambda
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

tensorflow.test.gpu_device_name() # Checking presence of GPU device in Google Collab

In [None]:
# --- Connexion to Google Drive account to import data --- #
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
# --- Import datasets  --- #
train = pd.read_csv("/content/drive/My Drive/Digits/train.csv")
test = pd.read_csv("/content/drive/My Drive/Digits/test.csv")


In [None]:
# --- Shaping & Cleaning data --- #
# Split explanatory variables from response variable
y_train = train["label"]
X_train = train.drop(labels = ["label"],axis = 1) 

# Set values from 0 - 255 to 0 - 1
X_train = X_train / 255.0
test = test / 255.0

# Reshape as 28 x 28 pixels image
X_train = X_train.values.reshape(train.shape[0],28,28,1)
X_sub = test.values.reshape(test.shape[0],28,28,1)

# Transform to fit neural network outputs (4 = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
y_train = to_categorical(y_train, num_classes = 10)

# Split in train and validation sets for learning. 42000 * 0.07 = 3000 images for validation set. 
X_train, X_val, Y_train, Y_val = train_test_split(X_train, y_train, test_size = 0.07)


In [None]:
# --- Example of an image from train set --- #
showimg = plt.imshow(X_train[0][:,:,0], cmap='Greys') # Looks like a 1

In [None]:
# --- Define several CNN models : --- #

# First model (Le Net 1): Conv, Conv, Max, Dropout, Conv, Conv, Max, Dropout, Flatten, Dense, Dropout, Dense
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

# Second model (ResNet): Conv, Batch, Conv, Batch, Conv, Batch, Max, Dropout, Conv, Batch, Conv, Batch, Conv, Batch, Max, Dropout, Flatten, Dense, Dropout, Dense, Dropout, Dense
model2 = Sequential()

model2.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu',
                 input_shape = (28, 28, 1)))
model2.add(BatchNormalization())
model2.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu'))
model2.add(BatchNormalization())
model2.add(Conv2D(filters = 16, kernel_size = (3, 3), activation='relu'))
model2.add(BatchNormalization())
model2.add(MaxPool2D(strides=(2,2)))
model2.add(Dropout(0.25))

model2.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model2.add(BatchNormalization())
model2.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model2.add(BatchNormalization())
model2.add(Conv2D(filters = 32, kernel_size = (3, 3), activation='relu'))
model2.add(BatchNormalization())
model2.add(MaxPool2D(strides=(2,2)))
model2.add(Dropout(0.25))

model2.add(Flatten())
model2.add(Dense(512, activation='relu'))
model2.add(Dropout(0.25))
model2.add(Dense(1024, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(10, activation='softmax'))

# Third model (Resnet variation): Standardization, Conv, Conv, Max, Batch, Conv, Conv, Max, Batch, Conv, Max, Flatten, Batch, Dense, Dense
mean = np.mean(X_train)
std = np.std(X_train)

def standardize(x):
    return (x-mean)/std

model3=Sequential()

model3.add(Lambda(standardize,input_shape=(28,28,1)))
model3.add(Conv2D(64,(3,3),activation="relu"))
model3.add(Conv2D(64,(3,3),activation="relu"))
    
model3.add(MaxPool2D(pool_size=(2,2)))
model3.add(BatchNormalization())
model3.add(Conv2D(128,(3,3),activation="relu"))
model3.add(Conv2D(128,(3,3),activation="relu"))
    
model3.add(MaxPool2D(pool_size=(2,2)))
model3.add(BatchNormalization())
model3.add(Conv2D(256,(3,3),activation="relu"))
    
model3.add(MaxPool2D(pool_size=(2,2)))
    
model3.add(Flatten())
model3.add(BatchNormalization())
model3.add(Dense(512,activation="relu"))
model3.add(Dense(10,activation="softmax"))


In [None]:
# --- Generate data augmentation in batches --- #
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 20)

epochs = 30 # forward-backward passes of training examples
batch_size = 40 # Number of training examples in 1 epoch
spe = 1000 #steps per epoch, just in case (rel. to data augmentation)

In [None]:
# --- Run first model --- #
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) # Found on google
annealer = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001) # Reduce learning when learning stabilizes

model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])

hist = model.fit_generator(datagen.flow(X_train,Y_train, batch_size = batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch = spe, callbacks=[annealer])


In [None]:
# --- Run second model --- #
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
annealer = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

model2.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])

hist2 = model2.fit_generator(datagen.flow(X_train,Y_train, batch_size = batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch=spe, callbacks=[annealer])


In [None]:
# --- Run third model --- #
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
annealer = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

model3.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])

hist3 = model3.fit_generator(datagen.flow(X_train,Y_train, batch_size = batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch=spe, callbacks=[annealer])


In [None]:
# --- Results of learning (accuracy and loss) --- #
final_loss, final_acc = model.evaluate(X_val, Y_val, verbose=0)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_acc))

final_loss2, final_acc2 = model2.evaluate(X_val, Y_val, verbose=0)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss2, final_acc2))

final_loss3, final_acc3 = model3.evaluate(X_val, Y_val, verbose=0)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss3, final_acc3))

# Define the "best model", here based on accuracy only
models = {'model1':final_acc, 'model2':final_acc2, 'model3': final_acc3}
best_model = max(models.items(), key=operator.itemgetter(1))[0]


In [None]:
# --- Print confusion matrices --- #
y_hat = model.predict(X_val)
y_pred = np.argmax(y_hat, axis=1)
y_true = np.argmax(Y_val, axis=1)
cm = confusion_matrix(y_true, y_pred)
print(cm)

y_hat2 = model2.predict(X_val)
y_pred2 = np.argmax(y_hat2, axis=1)
y_true = np.argmax(Y_val, axis=1)
cm2 = confusion_matrix(y_true, y_pred2)
print(cm2)

y_hat3 = model3.predict(X_val)
y_pred3 = np.argmax(y_hat3, axis=1)
y_true = np.argmax(Y_val, axis=1)
cm3 = confusion_matrix(y_true, y_pred3)
print(cm3)


Errors on models seems to be different. 
Ensemble models can improve predict since for each number to predict at least two models are agree (most of the time).
It will allow to reduce errors (instead of juste choosing the 'best model').



In [None]:
# --- Predict results with the submission file already cleanned --- #
#y_hat = model.predict(X_sub)
y_pred = np.argmax(model.predict(X_sub),axis=1)

#y_hat2 = model2.predict(X_sub)
y_pred2 = np.argmax(model2.predict(X_sub),axis=1)

#y_hat3 = model3.predict(X_sub)
y_pred3 = np.argmax(model3.predict(X_sub),axis=1)

# Store it in pandas DataFrame
df = pd.DataFrame({'model1':y_pred, 'model2':y_pred2, 'model3':y_pred3})

# Use all model to improve predictions
df['mode'] = df.mode(axis='columns')[0].astype(int) # most common value predicted

# Use the best model if all models have different prediction otherwise use the mode.
df['Label'] = np.where(((df['model1'] != df['model2']) & (df['model1'] != df['model3']) & (df['model3'] != df['model2']))  == True , df[best_model], df['mode'])
# Recreate the correct index for submission
df['ImageId'] = df.index + 1

# Create the output file
df_output = df[['ImageId', 'Label']]
df_output.to_csv("/content/drive/My Drive/Digits/submission_mix.csv")
