In [26]:
import numpy as np
import sklearn
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras import layers, activations

In [27]:
# to make this notebook’s output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32) / 255.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test.astype(np.float32) / 255


In [28]:
# Defining the model
# Input layer
# Lower layer with ReLu activation function
# Upper layer with softmax activation function
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'), #lower layer
  tf.keras.layers.Dense(10, activation='softmax') #upper layer
])

In [29]:
n_epochs = 5 
batch_size = 32

# Function to generate random batches
def random_batch(X, y, batch_size):     
    idx = np.random.randint(len(X), size=batch_size)     
    return X[idx], y[idx]

# Function to print the training status
def print_status_bar(step, total, train_loss, train_accuracy, val_loss, val_accuracy):
    metrics_str = f"accuracy: {train_accuracy.result():.4f} - loss: {train_loss.result():.4f} - val_accuracy: {val_accuracy.result():.4f} - val_loss: {val_loss.result():.4f}"
    end = "" if step < total else "\n"
    print(f"\r{step}/{total} - " + metrics_str, end=end)

n_steps = len(X_train) // batch_size

# Define the optimizers with specified learning rates
first_opt = tf.keras.optimizers.SGD(learning_rate=1e-4)
second_opt = tf.keras.optimizers.Nadam(learning_rate=1e-3)


loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() #using sparse categorical cross entropy as a loss function
train_loss = tf.keras.metrics.Mean(name="loss")
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
val_loss = tf.keras.metrics.Mean(name="val_loss")
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="val_accuracy")

# Custom Training Loop
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train, batch_size)
        with tf.GradientTape(persistent=True) as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)


        train_accuracy.update_state(y_batch, y_pred)
        gradients=tape.gradient(loss,model.trainable_variables)

        first_opt.apply_gradients(zip(gradients[:2],model.layers[1].trainable_variables)) #lower layer
        second_opt.apply_gradients(zip(gradients[2:],model.layers[2].trainable_variables)) #upper layer
        train_loss(loss)

        # Print summary statistics and update them until the epoch end
        if step % 100 == 0:
            y_val_pred = model(X_valid, training=False)
            val_loss.update_state(loss_fn(y_valid, y_val_pred))
            val_accuracy.update_state(y_valid, y_val_pred)
            print_status_bar(step, n_steps, train_loss, train_accuracy, val_loss, val_accuracy) #recalling the print_status_bar function

    for metric in [train_loss, train_accuracy, val_loss, val_accuracy]:
        metric.reset_state()

Epoch 1/5
1700/1718 - accuracy: 0.7034 - loss: 1.0097 - val_accuracy: 0.7320 - val_loss: 0.9479Epoch 2/5
1700/1718 - accuracy: 0.7917 - loss: 0.6258 - val_accuracy: 0.7972 - val_loss: 0.6108Epoch 3/5
1700/1718 - accuracy: 0.8053 - loss: 0.5586 - val_accuracy: 0.8112 - val_loss: 0.5518Epoch 4/5
1700/1718 - accuracy: 0.8184 - loss: 0.5227 - val_accuracy: 0.8186 - val_loss: 0.5214Epoch 5/5
1700/1718 - accuracy: 0.8209 - loss: 0.5098 - val_accuracy: 0.8251 - val_loss: 0.5014