Digit Recognizer Kaggle

In [125]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout 
from tensorflow.keras import utils

from sklearn.model_selection import train_test_split
from google.colab import files
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

In [3]:
file = files.upload()

Saving kaggle.json to kaggle.json


In [4]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle

In [5]:
!kaggle competitions download -c digit-recognizer

Downloading digit-recognizer.zip to /content
 85% 13.0M/15.3M [00:01<00:00, 13.4MB/s]
100% 15.3M/15.3M [00:01<00:00, 9.37MB/s]


In [71]:
!ls

digit-recognizer.zip  sample_submission.csv  test.csv
sample_data	      submission.csv	     train.csv


In [126]:
train_dataset = np.loadtxt('train.csv', skiprows=1, delimiter=',')

In [127]:
train_dataset[0:5]

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [134]:
x_train = train_dataset[:, 1:]
x_train = x_train.reshape(x_train.shape[0], 28, 28)
input_shape = (28, 28)

In [135]:
x_train = x_train / 255.0

In [136]:
x_train[1].shape

(28, 28)

In [137]:
y_train = train_dataset[:, 0]

In [138]:
y_train[:5]

array([1., 0., 1., 4., 0.])

In [140]:
x_train, x_test, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, random_state=42)

In [141]:
x_train.shape

(26880, 28, 28)

In [142]:
model = keras.Sequential([
                        keras.layers.Flatten(input_shape=(28,28)),
                        keras.layers.Dense(128, activation="relu"), 
                        keras.layers.Dense(10, activation="softmax")
])

In [143]:
model.compile(optimizer=tf.keras.optimizers.SGD(), 
              loss="sparse_categorical_crossentropy", metrics=["accuracy"])

print(model.summary())

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_5 (Flatten)         (None, 784)               0         
                                                                 
 dense_10 (Dense)            (None, 128)               100480    
                                                                 
 dense_11 (Dense)            (None, 10)                1290      
                                                                 
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
None


In [144]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2881e9a4c0>

In [145]:
test_dataset = np.loadtxt('test.csv', skiprows=1, delimiter=",")

In [146]:
test_dataset[:5]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [147]:
x_test = test_dataset.reshape(test_dataset.shape[0], 28, 28)

In [148]:
x_test = x_test / 255.0

In [149]:
predictions = model.predict(x_test)



In [150]:
predictions[:5]

array([[3.94374765e-05, 6.44337084e-10, 9.99810040e-01, 1.41677156e-04,
        6.56639898e-09, 6.67068321e-07, 2.90839466e-07, 2.04100800e-07,
        7.30003012e-06, 3.23566468e-07],
       [9.95789826e-01, 6.22984830e-10, 3.91878311e-05, 2.99471994e-05,
        2.73097291e-11, 4.11502365e-03, 2.43090699e-06, 7.41492613e-06,
        1.61752050e-05, 1.31594433e-07],
       [1.21889543e-03, 3.68990423e-03, 1.46680828e-02, 7.62688136e-03,
        2.50886619e-01, 4.98206615e-02, 3.36106448e-03, 1.04238736e-02,
        1.03157811e-01, 5.55146158e-01],
       [5.92609355e-03, 1.01756574e-04, 8.37940425e-02, 2.32642720e-04,
        1.69691205e-01, 1.18082413e-03, 4.57473146e-03, 2.87175447e-01,
        5.02925413e-03, 4.42294002e-01],
       [2.45621137e-04, 1.11476122e-03, 2.27337644e-01, 7.58063972e-01,
        1.03981256e-05, 2.90241488e-03, 2.79300660e-03, 4.69533668e-04,
        7.02053634e-03, 4.21484110e-05]], dtype=float32)

In [151]:
predictions = np.argmax(predictions, axis=1)

In [152]:
predictions[:5]

array([2, 0, 9, 9, 3])

In [153]:
out = np.column_stack((range(1, predictions.shape[0]+1), predictions))

In [154]:
out[:5]

array([[1, 2],
       [2, 0],
       [3, 9],
       [4, 9],
       [5, 3]])

In [155]:
np.savetxt('submission.csv', out, header="ImageId,Label", 
            comments="", fmt="%d,%d")

In [156]:
!head submission.csv

ImageId,Label
1,2
2,0
3,9
4,9
5,3
6,7
7,0
8,3
9,0


In [157]:
!kaggle competitions submit -c digit-recognizer -m "Submition from Colab" -f submission.csv

100% 208k/208k [00:02<00:00, 72.2kB/s]
Successfully submitted to Digit Recognizer