In [1]:
import numpy as np 
import pandas as pd 

from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Conv2D, Flatten, BatchNormalization, Activation
from keras.utils.np_utils import to_categorical
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
#from scipy.stats import mode
from sklearn.model_selection import train_test_split

#Define model with 3 x 3 valid convolution, kernel_size=3, stride 1, and ReLU activation. 
#Also use BatchNormalization
def my_model3():
    model=Sequential()
    model.add( Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False, input_shape=(X_train.shape[1:])) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=48, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=80, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=96, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=112, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=144, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=160, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=176, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Flatten())
    
    model.add(Dense(units=10))
    model.add(BatchNormalization())
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#Define model with 5 x 5 valid convolution, kernel_size=3, stride 1, and ReLU activation. 
#Also use BatchNormalization
def my_model5():
    model=Sequential()
    
    model.add( Conv2D(filters=32, kernel_size=(5,5), strides=(1,1), padding='valid', activation=None, use_bias=False, input_shape=(X_train.shape[1:])) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=64, kernel_size=(5,5), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=96, kernel_size=(5,5), strides=(1,1), padding='valid', activation=None, use_bias=False ) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=128, kernel_size=(5,5), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=160, kernel_size=(5,5), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
        
    model.add(Flatten())
    
    model.add(Dense(units=10))
    model.add(BatchNormalization())
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#Define model with 7 x 7 valid convolution, kernel_size=3, stride 1, and ReLU activation. 
#Also use BatchNormalization
def my_model7():
    model=Sequential()
    
    model.add( Conv2D(filters=48, kernel_size=(7,7), strides=(1,1), padding='valid', activation=None, use_bias=False, input_shape=(X_train.shape[1:])) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=96, kernel_size=(7,7), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=144, kernel_size=(7,7), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add( Conv2D(filters=192, kernel_size=(7,7), strides=(1,1), padding='valid', activation=None, use_bias=False) )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
        
    model.add(Flatten())
    
    model.add(Dense(units=10))
    model.add(BatchNormalization())
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

'''
#The following function is useful if you want to implement ensemble throug majority vote
def mostCommon(mostC):
    val, count = mode(mostC, axis=1)
    return val.ravel()#.tolist() '''

#Read training and test data
X_train_full=pd.read_csv('/kaggle/input/digit-recognizer/train.csv', header='infer').values
X_test=pd.read_csv('/kaggle/input/digit-recognizer/test.csv', header='infer').values

#Separate label and images from the training data
X_train=X_train_full[:,1:]
y_train=X_train_full[:,0]

#Normalize train and test images
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_test = (X_test.astype(np.float32) - 127.5)/127.5

#If you wish to normalize intensities in the range of 0 to 1 use following
#X_train=X_train/255.
#X_test=X_test/255.

#delete X_train_full, you don't need it further
del X_train_full

#Reshpae train and test images from 784 to 28 x 28 x 1
X_train=X_train.reshape(-1,28,28,1)
X_test=X_test.reshape(-1,28,28,1)

#One-hot encode class labels
y_train_vectors=to_categorical(y_train)

print(X_train.shape)
print(X_test.shape)

X_train, X_val, y_train, y_val= train_test_split(X_train, y_train_vectors, test_size=0.2, random_state=2)

#Create instance of 3 CNNs
model3=my_model3()
model5=my_model5()
model7=my_model7()


#Create instance of ImageDataGenerator for augmenting training images.
#Augmentation can help avoid overfitting
#We are using rotation_range=10,zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1. 
#Nothing else for augmentation

train_datagen = ImageDataGenerator(featurewise_center=False,
                             samplewise_center=False,
                             featurewise_std_normalization=False,
                             samplewise_std_normalization=False,
                             zca_whitening=False,
                             rotation_range=10,
                             zoom_range=0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             horizontal_flip=False,
                             vertical_flip=False
                            )

#Use flow method to pass images to fit method in the batches of size 120
train_generator = train_datagen.flow(X_train, y_train,
                                     batch_size=120,
                                     shuffle=True)

val_datagen = ImageDataGenerator()
val_generator = val_datagen.flow(X_val, y_val,
                                 batch_size=120,
                                 shuffle=True)

#Set how we plan to reduce learning rate on plateau
reduceLROnPlateau = ReduceLROnPlateau(monitor='val_acc', 
                                patience=3,
                                verbose=1, 
                                factor=0.5,
                                min_lr=0.00001)


#fit 3 CNNs
model3.fit(train_generator, epochs=150, callbacks=[reduceLROnPlateau], validation_data=val_generator)
model5.fit(train_generator, epochs=150, callbacks=[reduceLROnPlateau], validation_data=val_generator)
model7.fit(train_generator, epochs=150, callbacks=[reduceLROnPlateau], validation_data=val_generator)

#Use 3-trained CNNs to make predictions. 
#Each prediction varialbe is a matrix of size 28K x 10 as there are 10 classes
prediction_vectors3=model3.predict(X_test)
prediction_vectors5=model5.predict(X_test)
prediction_vectors7=model7.predict(X_test)

print(prediction_vectors3.shape)
print(prediction_vectors5.shape)
print(prediction_vectors7.shape)

#One way of esembling, average predictions for 3 models and then use argmax to decide the
#prediction with max probability
average_prediction_vectors=(prediction_vectors3+prediction_vectors5+prediction_vectors7)/3.
predictions_final=np.argmax(average_prediction_vectors, axis=1)

'''
Another way of ensembling
Decide prediction from individual model and then take the majority vote

#Following 3 lines decide prediction from individual models, each prediction variable will be 
#now vector of size 28K
predictions3=np.argmax(prediction_vectors3,axis=1)
predictions5=np.argmax(prediction_vectors5,axis=1)
predictions7=np.argmax(prediction_vectors7,axis=1)

print(predictions3.shape)
print(predictions5.shape)
print(predictions7.shape)

#Combine predictions from individual model in 1 matrix, 
#number of rows will be 28K but now number of columns is 3
predictions=np.stack([predictions3,predictions5,predictions7], axis=1)

#mostCommon is the function written to take the majority vote
#After call to it, predictions_final is back to vector of size 28K
predictions_final=mostCommon(predictions)
print(predictions_final.shape)

'''

#Read sample_submission.csv in dataframe sub
sub = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

#Overwrite labels in dataframe sub
sub["Label"] = predictions_final

#Write updated dataframes as submission.csv
sub.to_csv('submission.csv',index=False)

(42000, 28, 28, 1)
(28000, 28, 28, 1)


2022-03-11 04:30:22.765202: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-11 04:30:22.868766: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-11 04:30:22.869459: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-11 04:30:22.870958: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Epoch 1/150


2022-03-11 04:30:27.637438: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 7