In [None]:
import tensorflow as tf

tf.keras.backend.set_floatx("float32")
tf.config.experimental.enable_tensor_float_32_execution(False)
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import pandas as pd
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Conv2D,
    MaxPool2D,
    Dense,
    Flatten,
    Dropout,
    BatchNormalization,
    Input,
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from adjustText import adjust_text

In [None]:
# Load MNIST
(x_train, labels_train), (x_val, labels_val) = mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_val = x_val.astype("float32") / 255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_val = x_val.reshape(-1, 28, 28, 1)
y_train = to_categorical(labels_train, 10)
y_val = to_categorical(labels_val, 10)

In [None]:
# Load Nearly-MNIST from CSV
df_nearly = pd.read_csv("./datasets/nearly_mnist.csv")
x_nearly = df_nearly.iloc[:, :-1].values.astype("float32") / 255.0
y_nearly_labels = df_nearly["Labels"].values.astype("int")

x_nearly = x_nearly.reshape(-1, 28, 28, 1)
y_nearly = to_categorical(y_nearly_labels, 10)

In [None]:
# Custom callback to evaluate on MNIST val and Nearly-MNIST each epoch
class ValidationAccLogger(Callback):
    def __init__(self, x_val, y_val, x_extra, y_extra):
        super().__init__()
        self.x_val = x_val
        self.y_val = y_val
        self.x_extra = x_extra
        self.y_extra = y_extra
        self.val_accuracies = []
        self.extra_accuracies = []

    def on_epoch_end(self, epoch, logs=None):
        val_acc = self.model.evaluate(self.x_val, self.y_val, verbose=0)[1]
        extra_acc = self.model.evaluate(self.x_extra, self.y_extra, verbose=0)[1]
        self.val_accuracies.append(val_acc)
        self.extra_accuracies.append(extra_acc)

In [None]:
# Define CNN
inputs = Input(shape=x_train.shape[1:])
x = Conv2D(64, (5, 5), activation="relu", padding="same")(inputs)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
x = MaxPool2D((2, 2))(x)
x = Conv2D(128, (3, 3), activation="relu", padding="same")(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
x = MaxPool2D((2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation="relu", kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dropout(0.6)(x)
outputs = Dense(10, activation="softmax")(x)

net = Model(inputs=inputs, outputs=outputs)
net.summary()

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.1,
)
datagen.fit(x_train)

In [None]:
val_logger = ValidationAccLogger(x_val, y_val, x_nearly, y_nearly)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)

net.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.0001),
    metrics=["accuracy"],
)

history = net.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    validation_data=(x_val, y_val),
    epochs=30,
    callbacks=[early_stopping, val_logger],
)

plt.figure(figsize=(10, 6))
train_acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
nearly_acc = val_logger.extra_accuracies

plt.plot(train_acc, label="Train Accuracy")
plt.plot(val_acc, label="Validation Accuracy (MNIST)")
plt.plot(nearly_acc, label="Validation Accuracy (Nearly-MNIST)")

# Find max points
max_train_epoch = np.argmax(train_acc)
max_val_epoch = np.argmax(val_acc)
max_nearly_epoch = np.argmax(nearly_acc)

# Plot scatter points
plt.scatter(max_train_epoch, train_acc[max_train_epoch], color="blue")
plt.scatter(max_val_epoch, val_acc[max_val_epoch], color="orange")
plt.scatter(max_nearly_epoch, nearly_acc[max_nearly_epoch], color="green")

# Create text labels
texts = []
texts.append(
    plt.text(
        max_train_epoch,
        train_acc[max_train_epoch],
        f"{train_acc[max_train_epoch]:.4f}",
        color="blue",
    )
)
texts.append(
    plt.text(
        max_val_epoch,
        val_acc[max_val_epoch],
        f"{val_acc[max_val_epoch]:.4f}",
        color="orange",
    )
)
texts.append(
    plt.text(
        max_nearly_epoch,
        nearly_acc[max_nearly_epoch],
        f"{nearly_acc[max_nearly_epoch]:.4f}",
        color="green",
    )
)

# Adjust text to avoid overlap, with arrow
adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray"))

plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
inputs = Input(shape=x_train.shape[1:])

# Conv Block 1
x = Conv2D(64, (3, 3), activation="leaky_relu", padding="same")(inputs)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

# Conv Block 2
x = Conv2D(128, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

# Dense Block
x = Flatten()(x)
x = Dense(256, activation="leaky_relu", kernel_regularizer=l2(1e-4))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

outputs = Dense(10, activation="softmax")(x)

net = Model(inputs=inputs, outputs=outputs)
net.summary()

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.1,
)
datagen.fit(x_train)

In [None]:
val_logger = ValidationAccLogger(x_val, y_val, x_nearly, y_nearly)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)

net.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.0001),
    metrics=["accuracy"],
)

history = net.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    validation_data=(x_val, y_val),
    epochs=30,
    callbacks=[early_stopping, val_logger],
)

plt.figure(figsize=(10, 6))
train_acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
nearly_acc = val_logger.extra_accuracies

plt.plot(train_acc, label="Train Accuracy")
plt.plot(val_acc, label="Validation Accuracy (MNIST)")
plt.plot(nearly_acc, label="Validation Accuracy (Nearly-MNIST)")

# Find max points
max_train_epoch = np.argmax(train_acc)
max_val_epoch = np.argmax(val_acc)
max_nearly_epoch = np.argmax(nearly_acc)

# Plot scatter points
plt.scatter(max_train_epoch, train_acc[max_train_epoch], color="blue")
plt.scatter(max_val_epoch, val_acc[max_val_epoch], color="orange")
plt.scatter(max_nearly_epoch, nearly_acc[max_nearly_epoch], color="green")

# Create text labels
texts = []
texts.append(
    plt.text(
        max_train_epoch,
        train_acc[max_train_epoch],
        f"{train_acc[max_train_epoch]:.4f}",
        color="blue",
    )
)
texts.append(
    plt.text(
        max_val_epoch,
        val_acc[max_val_epoch],
        f"{val_acc[max_val_epoch]:.4f}",
        color="orange",
    )
)
texts.append(
    plt.text(
        max_nearly_epoch,
        nearly_acc[max_nearly_epoch],
        f"{nearly_acc[max_nearly_epoch]:.4f}",
        color="green",
    )
)

# Adjust text to avoid overlap, with arrow
adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray"))

plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
inputs = Input(shape=x_train.shape[1:])

# Conv Block 1
x = Conv2D(64, (3, 3), activation="leaky_relu", padding="same")(inputs)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

# Conv Block 2
x = Conv2D(128, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation="leaky_relu", padding="same")(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

# Dense Block
x = Flatten()(x)
x = Dense(256, activation="leaky_relu", kernel_regularizer=l2(1e-4))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

outputs = Dense(10, activation="softmax")(x)

net = Model(inputs=inputs, outputs=outputs)
net.summary()

In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=35,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.2,
)
datagen.fit(x_train)

In [None]:
val_logger = ValidationAccLogger(x_val, y_val, x_nearly, y_nearly)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)

net.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.0001),
    metrics=["accuracy"],
)

history = net.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    validation_data=(x_val, y_val),
    epochs=30,
    callbacks=[early_stopping, val_logger],
)

plt.figure(figsize=(10, 6))
train_acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
nearly_acc = val_logger.extra_accuracies

plt.plot(train_acc, label="Train Accuracy")
plt.plot(val_acc, label="Validation Accuracy (MNIST)")
plt.plot(nearly_acc, label="Validation Accuracy (Nearly-MNIST)")

# Find max points
max_train_epoch = np.argmax(train_acc)
max_val_epoch = np.argmax(val_acc)
max_nearly_epoch = np.argmax(nearly_acc)

# Plot scatter points
plt.scatter(max_train_epoch, train_acc[max_train_epoch], color="blue")
plt.scatter(max_val_epoch, val_acc[max_val_epoch], color="orange")
plt.scatter(max_nearly_epoch, nearly_acc[max_nearly_epoch], color="green")

# Create text labels
texts = []
texts.append(
    plt.text(
        max_train_epoch,
        train_acc[max_train_epoch],
        f"{train_acc[max_train_epoch]:.4f}",
        color="blue",
    )
)
texts.append(
    plt.text(
        max_val_epoch,
        val_acc[max_val_epoch],
        f"{val_acc[max_val_epoch]:.4f}",
        color="orange",
    )
)
texts.append(
    plt.text(
        max_nearly_epoch,
        nearly_acc[max_nearly_epoch],
        f"{nearly_acc[max_nearly_epoch]:.4f}",
        color="green",
    )
)

# Adjust text to avoid overlap, with arrow
adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray"))

plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()