In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
train = pd.read_csv("../input/digit-recognizer/train.csv")

In [None]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten,Dropout, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator

In [None]:
train.shape

In [None]:
train.head()

In [None]:
Y_train = train.loc[:,'label']
X_train = train.drop('label',axis = 1)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size = 0.1, random_state = 48)

In [None]:
#plot the first image in the data
plt.imshow(np.array(x_train.iloc[0]).reshape(28,28))
print(y_train.iloc[0])

In [None]:
x_train.shape

In [None]:
x_train = np.array(x_train).reshape(x_train.shape[0],28,28,1)
x_test = np.array(x_test).reshape(x_test.shape[0],28,28,1)

In [None]:
x_train = x_train / 255
x_test = x_test / 255

In [None]:
y_train = np.array(y_train)

In [None]:
plt.imshow(x_train[0])
print("label:", y_train[0])

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
y_train[0]

# creating a model

* Model Type: Sequential - Allows to build a model layer by layer
* First 2 layers are Convolutional 2D layers - input images are seen as 2D matrices
* 64 and 32 are the number of nodes in each layers. These are decided on the basis of the size of the date. These have to be tuned.
* Kernel Size: this is the size of the filter matrix - in our case we have kept this as 3 i.e 3x3 filter matrix is used
* Activation function: ReLu( rectified linear activation) - works best for nueral networks'
* The first layer takes in input images in the shape (28,28,1) where 1 signifies that the images are in greyscale
* We have 10 nodes in out output layers; one for each possible outcome (0-9)
* Activation in the output layer is softmax ( it sums up the output to 1) so that we can interpret the output as probabilities. The prediction will be made based on the probabilites.


In [None]:
#create model
model = Sequential()
#add model layers
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

# Compiling the model
compiling the model takes three paramters: optimizer, loss and metrics
* Optimizer controls the learning rate - adam in our case
* Loss function : categorical_crossentropy - A lower score indicates that the model is performing better.

In [None]:
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=['accuracy'])

training data: x_train, y_train
validation data: x_test, y_test
epochs: the number of times the model will cycle throught the data : more epochs will improve the accuracy of the model uptill a certain number afterwhich there will be no change. - 3 in our case

In [None]:
#training the model
model.fit(x_train,y_train, validation_data=(x_test, y_test), epochs=35)

In [None]:
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [None]:
test.head()

In [None]:
test.shape

In [None]:
test = np.array(test).reshape(test.shape[0],28,28,1)

In [None]:
test.shape

In [None]:
test = test/ 255

In [None]:
prediction_probabilities = model.predict(test)

In [None]:
len(prediction_probabilities)

In [None]:
prediction = []
for i in range(len(prediction_probabilities)):
    prediction.append(list(prediction_probabilities[i]).index(max(prediction_probabilities[i])))

In [None]:
df = pd.DataFrame({'ImageId' : range(1,len(prediction)+1),'Label' : prediction})

In [None]:
df

In [None]:
df.to_csv('submission.csv',index=False)