# Digit Recognizer
Learn computer vision fundamentals with the famous MNIST data

---
## 1. Bibliotecas e Módulos

In [1]:
import pandas as pd
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras import applications
from tensorflow.keras import callbacks

import tensorflow as tf
tf.keras.utils.set_random_seed(0)

---
## 2. Dados

In [2]:
train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

train_data

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
test_data

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
submission

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
27995,27996,0
27996,27997,0
27997,27998,0
27998,27999,0


In [5]:
y_train = train_data.pop('label')
X_train = train_data.values.reshape(-1, 28, 28, 1)#.repeat(repeats=3, axis=3)
X_test = test_data.values.reshape(-1, 28, 28, 1)#.repeat(repeats=3, axis=3)

del train_data
del test_data

print(f'X_train shape: {X_train.shape} | y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')

X_train shape: (42000, 28, 28, 1) | y_train shape: (42000,)
X_test shape: (28000, 28, 28, 1)


---
## 3. Modelagem

In [6]:
model = Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.Resizing(height=84, width=84),
    layers.RandomRotation(factor=0.1, seed=0),
    layers.Lambda(applications.inception_resnet_v2.preprocess_input),
    applications.InceptionResNetV2(weights=None, input_shape=(84, 84, 1), pooling='avg', classes=10, classifier_activation='softmax'),
])

model.compile(
    optimizer='rmsprop', 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 84, 84, 1)         0         
                                                                 
 random_rotation (RandomRota  (None, 84, 84, 1)        0         
 tion)                                                           
                                                                 
 lambda (Lambda)             (None, 84, 84, 1)         0         
                                                                 
 inception_resnet_v2 (Functi  (None, 10)               54351530  
 onal)                                                           
                                                                 
Total params: 54,351,530
Trainable params: 54,290,986
Non-trainable params: 60,544
_________________________________________________________________


In [7]:
model.fit(
    x=X_train,
    y=y_train,
    batch_size=1000,
    epochs=50,
    callbacks=callbacks.EarlyStopping(patience=10, restore_best_weights=True),
    validation_split=0.05,
    use_multiprocessing=True
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50


<keras.callbacks.History at 0x7dc6bf5b8d90>

In [8]:
submission['Label'] = model.predict(X_test, use_multiprocessing=True).argmax(axis=-1)
submission.to_csv('submission.csv', index=False)

