In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
os.chdir('../input')
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [None]:
train.head()

**Check here for the distribution of classes among your dataset. This leads a way to creation of an unbiased model as the classes with lesser proportion can be treated specially **

In [None]:
len(train['label'].unique())


In [None]:
train['label'].value_counts().sort_index()

In [None]:
import seaborn as sns

In [None]:
g = sns.countplot(train['label'])

**Check for any outliers in the data using the describe function**

In [None]:
train.describe()

**Check for any missing values**

In [None]:
train.isnull().values.any()

**Now since all the EDA for the data is done and no issues have been found we proceed to *Data Preprocessing***

In [None]:
X_train = train.drop(labels = ['label'],axis=1)

In [None]:
X_train.head()

In [None]:
Y_train = train['label']

**The code below takes care of scaling of the data. This brings the range of the column values from (0,255) to (0,1)**

In [None]:
X_train = X_train/255
test = test/255

**The code below converts the target variable to its categorical form i.e a vector of a definite size so as to fit the output layer of the neural netwok**

In [None]:
from keras.utils import to_categorical

In [None]:
Y_train = to_categorical(Y_train)

In [None]:
np.unique(Y_train,axis=0)

**Now in order to use CNN, the tabular data format has to be converted to image format that is a pixel matrix. The 784 data points will be converted to a 28x28 matrix. Since the images are grayscale images the third dimension will be 1 , hence the final matrix will be of the form 28x28x1. **

In [None]:
X_train_matrix = X_train.values.reshape(-1,28,28,1)
test_matrix = test.values.reshape(-1,28,28,1)

In [None]:
X_train_matrix.shape

In [None]:
test_matrix.shape

**Now since the values are reshaped, we split the training dataset here into what is called the train and validation dataset using the sklearn.**

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_val,Y_train,Y_val = train_test_split(X_train_matrix,Y_train,test_size  = 0.1,random_state = 2)

**Now since the input values have been converted as such that they can be used with CNNs, hence now we will start creating the architecture of the model i.e the layers of the model.**

In [None]:
from keras.models import Sequential
from keras.layers.core import Flatten
from keras.layers import Conv2D,Dense,Dropout,MaxPool2D

In [None]:
model = Sequential()
model.add(Conv2D(filters=32,padding='Same',kernel_size=(5,5),activation='relu',input_shape=(28,28,1)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters=32,padding='Same',kernel_size=(5,5),activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters=64,kernel_size=(2,2),padding='Same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(2,2),padding='Same',activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64,kernel_size=(2,2),activation='relu',padding='Same'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10,activation='softmax'))

In [None]:
model.summary()

In [None]:
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator

In [None]:
optimizer = RMSprop(lr=0.001,rho=0.9,epsilon=1e-08,decay=0.0)

In [None]:
model.compile(optimizer=optimizer,loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
from keras.callbacks import ReduceLROnPlateau

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_acc',patience = 5,verbose = 1,factor = 0.3,min_lr = 0.00001)

In [None]:
epoch = 22
batch_size = 50

In [None]:
datagen = ImageDataGenerator(featurewise_center=False,
                            featurewise_std_normalization=False,
                            samplewise_center=False,
                            samplewise_std_normalization=False,
                            zca_whitening=False,
                            rotation_range=30,
                            zoom_range=0.1,
                            horizontal_flip=False,
                            vertical_flip=False,
                            width_shift_range=0.1,
                            height_shift_range=0.1)
datagen.fit(X_train)

In [None]:
history = model.fit_generator(datagen.flow(X_train,Y_train,batch_size=batch_size),
                             epochs=epoch,verbose=1,steps_per_epoch=X_train.shape[0]//batch_size,
                             validation_data = (X_val,Y_val),callbacks=[learning_rate_reduction])

In [None]:
import matplotlib.pyplot as plt

**Evaluate the model**


Now since the model has been trained and has achieved a good validation accuracy, one needs to evaluate the model in order to check for model performance.
Some complex models and mainly neural networks have a tendency to overfit on the training data due to complex calculations carried out on the data during training.
Overfitting is a curse for predictive models as they learn everything about training data but fail to perform on unseen data. Hence the concept of validation set is applied which is a dataset which is compleely unseen by the model and is only used to be evaluated on at the end of each epoch.
If the training accuracy and the validation accuracy coincide then the model is not overfit and if the accuracy is high it ready to predict on the test dataset. 
If the training accuracy is high and the valiodation accuracy is low then it is a case of overfitting.[](http://)

In [None]:
f,ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'],color='b',label='Training Loss')
ax[0].plot(history.history['val_loss'],color='r',label='Validation Loss',axes=ax[0])
legend = ax[0].legend(loc='best',shadow=True)
ax[1].plot(history.history['acc'],color='b',label='Accuracy')
ax[1].plot(history.history['val_acc'],color='r',label='Valoidation Accuracy')
legen = ax[1].legend(loc='best',shadow=True)

Clearly this is not a case of overfitting.

In [None]:
from sklearn.metrics import confusion_matrix
Y_pred = model.predict(X_val)
Y_pred = np.argmax(Y_pred,axis=1)
Y_true = np.argmax(Y_val,axis=1)
cm = confusion_matrix(Y_pred,Y_true)
plt.imshow(cm,interpolation='nearest',cmap=plt.cm.Blues)
plt.title('Confusion_matrix')


In [None]:
results = model.predict(test_matrix)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("../input/submission.csv",index=False)