<a href="https://colab.research.google.com/github/NicKylis/letter_recognition/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import kagglehub
import shutil
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras.utils import to_categorical

print ("hello world!")

In [None]:
def get_MNIST_dataset(train_generator, batch_size=32, val_split=0.2, random_state=42):
  mnist = tf.keras.datasets.mnist
  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
  x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

  x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = val_split, random_state=random_state)

  num_classes = 10  # MNIST has 10 classes (digits 0-9)
  y_train = to_categorical(y_train, num_classes)
  y_val = to_categorical(y_val, num_classes)
  y_test = to_categorical(y_test, num_classes)

  val_test_generator = ImageDataGenerator()
  train_gen = train_generator.flow(x_train, y_train, batch_size=batch_size)
  val_gen = val_test_generator.flow(x_val, y_val, batch_size=batch_size)
  test_gen = val_test_generator.flow(x_test, y_test, batch_size=batch_size, shuffle=False)

  return train_gen, val_gen, test_gen

In [None]:
train_generator = ImageDataGenerator()
train_generator_aug = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,)

train_data, val_data, test_data = get_MNIST_dataset(train_generator, batch_size=32, val_split=0.2, random_state=42)
train_data_aug, val_data_aug, test_data_aug = get_MNIST_dataset(train_generator, batch_size=32, val_split=0.2, random_state=42)

In [None]:
model = keras.Sequential([
    keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    keras.layers.MaxPooling2D((2,2)),

    keras.layers.Conv2D(64, (3,3), activation='relu'),
    keras.layers.MaxPooling2D((2,2)),

    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')  # 10 output neurons for digits 0-9
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', 'mae'])

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(train_data_aug,
         validation_data=val_data_aug,
         epochs=20,
         batch_size=32,
         shuffle=True,
         callbacks=[callback])

In [None]:
loss, acc = model.evaluate(test_data_aug)
print(f"Test Accuracy: {100*round(acc,4)}%")
print(f"Test Loss: {loss:.4f}")

In [None]:
epoch_loss = history.history['loss']
epoch_val_loss = history.history['val_loss']
epoch_mae = history.history['mae']
epoch_val_mae = history.history['val_mae']

fig, ax = plt.subplots(1, 2, figsize=(15,9))
ax[0].plot(range(0, len(epoch_loss)), epoch_loss, 'b-', linewidth=2, label='Train Loss')
ax[0].plot(range(0, len(epoch_val_loss)), epoch_val_loss, 'r-', linewidth=2, label='Val Loss')
ax[0].set_title('Evolution of loss on train & validation datasets over epochs')
ax[0].set_xlabel('Epoch')
ax[0].set_ylabel('Loss')
ax[0].legend(loc='best')

ax[1].plot(range(0,len(epoch_mae)), epoch_mae, 'b-', linewidth=2, label='Train MAE')
ax[1].plot(range(0,len(epoch_val_mae)), epoch_val_mae, 'r-', linewidth=2,label='Val MAE')
ax[1].set_title('Evolution of MAE on train & validation datasets over epochs')
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('MAE')
ax[1].legend(loc='best')