<a href="https://colab.research.google.com/github/Mgalvaz/digit-recognizer/blob/main/src/train_classic_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from keras.losses import CategoricalCrossentropy
from keras import layers, models
from keras.datasets import mnist
from keras.utils import to_categorical
import tensorflow as tf
import json

In [3]:
# Import the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Modify the images so that layers can "understand" the data
train_images = train_images.reshape((-1, 28, 28, 1)).astype('float32')
test_images = test_images.reshape((-1, 28, 28, 1)).astype('float32')

# Labels one-hot encoding
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [9]:
# Create both CNN models
model1 = models.Sequential([
    layers.Input((28, 28, 1)),
    layers.Rescaling(1./255),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='linear')
    ])
model2 = models.Sequential([
    layers.Input((28,28,1)),
    layers.Rescaling(1./255),
    layers.Conv2D(128, kernel_size=(5,5), strides=(1,1), padding='same', activation='relu'),
    layers.Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'),
    layers.MaxPooling2D(strides=(2,2), padding='valid'),
    layers.Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'),
    layers.MaxPooling2D(strides=(2,2), padding='valid'),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='linear')
])

In [10]:
# Train the models
model1.compile(optimizer='adam', loss=CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model2.compile(optimizer='adam', loss=CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
history1 = model1.fit(train_images, train_labels, epochs=5, batch_size=64, validation_split=0.1)
history2 = model2.fit(train_images, train_labels, epochs=5, batch_size=64, validation_split=0.1)

# Evaluate the models
test_loss1, test_acc1 = model1.evaluate(test_images, test_labels)
test_loss2, test_acc2 = model2.evaluate(test_images, test_labels)

Epoch 1/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.8545 - loss: 0.4687 - val_accuracy: 0.9823 - val_loss: 0.0591
Epoch 2/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9807 - loss: 0.0626 - val_accuracy: 0.9892 - val_loss: 0.0404
Epoch 3/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9869 - loss: 0.0419 - val_accuracy: 0.9888 - val_loss: 0.0369
Epoch 4/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9906 - loss: 0.0308 - val_accuracy: 0.9888 - val_loss: 0.0369
Epoch 5/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9922 - loss: 0.0246 - val_accuracy: 0.9903 - val_loss: 0.0349
Epoch 1/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 14ms/step - accuracy: 0.7139 - loss: 0.8338 - val_accuracy: 0.9828 - val_loss: 0.0558
Epoch 2/5
[1m844/844[0m [32m━

In [12]:
def save_history(history, test_loss, test_acc, filename):
    data = {
        'history': history.history,
        'test': {'test_loss': test_loss, 'test_accuracy': test_acc}
    }
    with open(filename, 'w') as f:
        json.dump(data, f)

model1.save('mnist_cnn1.keras')
model2.save('mnist_cnn2.keras')

# Save info
save_history(history1, test_loss1, test_acc1, 'history1.json')
save_history(history2, test_loss2, test_acc2, 'history2.json')