In [0]:
# Code to upload the data into colab
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving test.csv to test.csv
Saving train.csv to train.csv
User uploaded file "test.csv" with length 51118296 bytes
User uploaded file "train.csv" with length 76775041 bytes


In [0]:
# Importing the Dataset
import tensorflow as tf
import pandas as pd

In [0]:
# Reading the data
data = pd.read_csv('./train.csv') 

In [0]:
# Creating the feature matrix and the target matrix
X = data.drop(['label'], axis=1)  # Feature matrix
y = data['label']   # Target matrix

In [0]:
# Normalization of the data
X = X/255.0

In [0]:
# Reshaping the values into image of dimensions 28 x 28
X = X.values.reshape(-1,28,28,1)

In [0]:
y = tf.keras.utils.to_categorical(y, num_classes = 10)

In [0]:
# Train test split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [0]:
# Building the model
model =tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation = tf.nn.relu, input_shape = (28,28,1)),  # Convolutional layer
    tf.keras.layers.Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation = tf.nn.relu, input_shape = (28,28,1)),  # Convolutional layer
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),  # Max Pooling Layer
    tf.keras.layers.Dropout(0.25),   # Dropout Layer
    tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation = tf.nn.relu),  # Convolutional layer
    tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation = tf.nn.relu),  # Convolutional layer
    tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=((2,2))),  # Max Pooling Layer
    tf.keras.layers.Dropout(0.25),   # Dropout Layer
    tf.keras.layers.Flatten(),  # Flat layer
    tf.keras.layers.Dense(256, activation=tf.nn.relu), # Dense layer with 256 neurons 
    tf.keras.layers.Dropout(0.5),  # Dropout layer
    tf.keras.layers.Dense(10, activation=tf.nn.softmax) # Output layer
])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [0]:
# Compiling the model
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [0]:
# Training the data 
model.fit(x=X_train, y=y_train, epochs=30, batch_size=86, validation_data=(X_test, y_test) )

Train on 37800 samples, validate on 4200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fea276fb3c8>

In [0]:
# Reading the test data
test = pd.read_csv('./test.csv')

In [0]:
# Normalizing
test = test / 255.0

In [0]:
# Reshaping
test = test.values.reshape(-1,28,28,1)

In [0]:
# Predicting the data with 
result = model.predict(test)

In [0]:
result

array([[9.74710696e-14, 6.39215584e-16, 1.00000000e+00, ...,
        1.16675333e-14, 2.74620406e-15, 1.54960232e-18],
       [1.00000000e+00, 1.55401403e-16, 3.28144087e-14, ...,
        3.78229137e-15, 1.17474305e-14, 2.65533359e-11],
       [2.44779322e-13, 7.08130386e-13, 3.67062596e-11, ...,
        1.66286296e-09, 1.46644390e-06, 9.99998450e-01],
       ...,
       [1.47136931e-28, 2.62452530e-17, 4.59114450e-20, ...,
        2.02870373e-16, 1.90770282e-17, 1.76883744e-18],
       [2.36041672e-11, 2.39834819e-10, 8.48705273e-10, ...,
        3.19096330e-06, 1.89266217e-07, 9.99983668e-01],
       [1.05618590e-16, 3.87853078e-17, 1.00000000e+00, ...,
        5.85026954e-14, 7.97544133e-14, 2.23306118e-19]], dtype=float32)

In [0]:
# Predicting the data for kaggle submission
# Got 0.99357 on kaggle with rank 723
import numpy as np
result = np.argmax(result, axis=1)
result = pd.Series(result,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),result],axis = 1)

In [0]:
submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
5,6,7
6,7,0
7,8,3
8,9,0
9,10,3


In [0]:
submission.to_csv('submission.csv', index=False)

In [0]:
!ls

sample_data  submission.csv  test.csv  train.csv


In [0]:
from google.colab import files

files.download('submission.csv')