<a href="https://colab.research.google.com/github/Gallair/Deep_Learning/blob/main/number_recognition_with_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Let's write a CNN model to identify the hand written numbers

### 1. Be one with data
1. Download a dataset and analyze it

In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import zipfile
from sklearn.model_selection import train_test_split


In [None]:
zip_ref = zipfile.ZipFile("sample_data/train.csv.zip")
zip_ref.extractall()
zip_ref.close()

In [None]:
train_data = pd.read_csv("sample_data/train.csv")

train_label = train_data["label"]
train_data = train_data.drop("label", axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_data, train_label, test_size = 0.2, random_state = 42)

In [None]:
X_train = tf.constant(X_train, dtype = tf.float32)
X_test = tf.constant(X_test, dtype = tf.float32)
y_train = tf.constant(y_train)
y_test = tf.constant(y_test)

In [None]:
# Normalized data
X_train = X_train /255.
X_test = X_test / 255.

In [None]:
X_train_reshaped = tf.reshape(X_train, (X_train.shape[0], 28, 28, 1))
X_test_reshaped = tf.reshape(X_test, (X_test.shape[0], 28, 28, 1))

In [None]:
X_train_reshaped.shape

TensorShape([33600, 28, 28, 1])

In [None]:
y_train.shape

TensorShape([33600])

In [None]:
X_train[0].shape

TensorShape([784])

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation, Dropout

tf.random.set_seed = 42

model_1 = tf.keras.Sequential([
    Conv2D(filters = 10,
           activation = "relu",
           kernel_size = 3,
           strides = 1,
           padding = "valid",
           input_shape = (28,28,1)),
    Conv2D(10,3, activation = "relu"),
    MaxPool2D(2,2),
    Dropout(0.2),
    Flatten(),
    Dense(10, activation = "softmax")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the model
model_1.compile(loss = "sparse_categorical_crossentropy", optimizer = Adam(),  metrics = ["accuracy"])

In [None]:
model_1.summary()

In [None]:
# Now we can fit the model
batch_size = 512
history_1 = model_1.fit(X_train_reshaped,
                        y_train,
                        epochs = 5,
                        batch_size = batch_size,
                        steps_per_epoch = len(X_train_reshaped),
                        validation_data = (X_test_reshaped, y_test),
                        validation_steps = len(X_test_reshaped))

Epoch 1/5
[1m33600/33600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 125us/step - accuracy: 0.6766 - loss: 1.2336 - val_accuracy: 0.8764 - val_loss: 0.4232
Epoch 2/5
[1m33600/33600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9us/step - accuracy: 0.8841 - loss: 0.3846 - val_accuracy: 0.9087 - val_loss: 0.3173
Epoch 3/5
[1m33600/33600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9us/step - accuracy: 0.9138 - loss: 0.2919 - val_accuracy: 0.9313 - val_loss: 0.2460
Epoch 4/5
[1m33600/33600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9us/step - accuracy: 0.9363 - loss: 0.2185 - val_accuracy: 0.9487 - val_loss: 0.1825
Epoch 5/5
[1m33600/33600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9us/step - accuracy: 0.9500 - loss: 0.1702 - val_accuracy: 0.9575 - val_loss: 0.1486


In [None]:
test_pred = pd.DataFrame(model_1.predict(X_test_reshaped, batch_size=512))
test_pred = pd.DataFrame(test_pred.idxmax(axis = 1))
test_pred.index.name = 'ImageId'
test_pred = test_pred.rename(columns = {0: 'Label'}).reset_index()
test_pred['ImageId'] = test_pred['ImageId'] + 1

test_pred.head()


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


Unnamed: 0,ImageId,Label
0,1,8
1,2,1
2,3,9
3,4,9
4,5,8


In [None]:
test_pred.to_csv('mnist_submission.csv', index = False)

In [None]:
model_1.predict(X_test_reshaped, batch_size=512)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


array([[5.4939400e-04, 1.0964385e-03, 1.3723428e-02, ..., 1.7703107e-05,
        9.8110551e-01, 4.0465136e-04],
       [1.4006390e-04, 9.7672087e-01, 4.3659476e-03, ..., 1.1928253e-03,
        1.2601931e-02, 1.5841873e-03],
       [2.0060299e-06, 1.1403600e-06, 5.3219555e-06, ..., 1.3644649e-02,
        1.9242118e-04, 9.7865272e-01],
       ...,
       [6.4586231e-07, 5.9884710e-06, 2.3895795e-05, ..., 6.7737098e-08,
        1.2194085e-05, 2.2335290e-07],
       [9.8013651e-01, 1.4509904e-10, 1.1154104e-03, ..., 1.0572216e-05,
        3.1743806e-05, 1.2942631e-05],
       [2.6731300e-06, 1.7076618e-06, 3.3774177e-04, ..., 1.5018750e-03,
        4.0720133e-04, 9.9638027e-01]], dtype=float32)