In [1]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model

# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape the data to have a fourth dimension
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# Normalize the pixel values
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# One-hot encode the targets
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

# Define the dynamic margin loss function
batch_size = 32
n_samples = len(X_train)

def get_dynamic_margin_loss(optimizer, margin=0.25, scale=50.0):
    class DynamicMarginLoss(tf.keras.losses.Loss):
        def __init__(self, margin, scale):
            super().__init__()
            self.margin = margin
            self.scale = scale

        def call(self, y_true, y_pred):
            epoch = tf.cast(optimizer.iterations, tf.float32) / tf.cast((n_samples / batch_size), tf.float32)
            dynamic_margin = self.margin * (1 - 0.01 * epoch)
            y_pred = y_true * (y_pred - dynamic_margin) + (1 - y_true) * y_pred
            y_pred *= self.scale
            return tf.keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=True)
    return DynamicMarginLoss(margin, scale)

# Define the model architecture
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
outputs = Dense(10, activation='softmax')(x)

model = Model(inputs, outputs)

# Compile the model
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

optimizer = Adam(learning_rate=lr_schedule)
loss = get_dynamic_margin_loss(optimizer, margin=0.25, scale=50.0)

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Evaluate the model
results = model.evaluate(X_test, y_test)
accuracy = results[1]

print(f"Accuracy on test set: {accuracy}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on test set: 0.9685999751091003
