In [2]:
#  Callbacks + Model Training 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import fashion_mnist
import os, datetime

# ------------------------------
# Load and preprocess data
# ------------------------------
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

X_train = X_train.reshape(-1, 28 * 28)
X_test = X_test.reshape(-1, 28 * 28)

X_val = X_train[:9000]
y_val = y_train[:9000]
X_train = X_train[9000:]
y_train = y_train[9000:]

# ------------------------------
# Define the model
# ------------------------------
model = keras.Sequential([
    layers.Dense(256, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.4),

    layers.Dense(128, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(10, activation="softmax")
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# ------------------------------
# Create directories for logs/checkpoints
# ------------------------------
base_dir = "training_logs"
os.makedirs(base_dir, exist_ok=True)

timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = os.path.join(base_dir, "tensorboard", timestamp)
checkpoint_path = os.path.join(base_dir, "checkpoints", f"best_model_{timestamp}.keras")
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

# ------------------------------
# Define callbacks
# ------------------------------
tensorboard_cb = keras.callbacks.TensorBoard(
    log_dir=log_dir,
    histogram_freq=1,
    write_graph=True,
    write_images=True
)

checkpoint_cb = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

earlystop_cb = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)

callbacks = [tensorboard_cb, checkpoint_cb, earlystop_cb, reduce_lr_cb]

# ------------------------------
# Train the model with callbacks
# ------------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=128,
    callbacks=callbacks,
    verbose=2
)



Epoch 1/50

Epoch 1: val_loss improved from None to 0.83790, saving model to training_logs\checkpoints\best_model_20251028-113151.keras
399/399 - 6s - 16ms/step - accuracy: 0.7804 - loss: 1.0732 - val_accuracy: 0.8267 - val_loss: 0.8379 - learning_rate: 1.0000e-03
Epoch 2/50

Epoch 2: val_loss improved from 0.83790 to 0.81287, saving model to training_logs\checkpoints\best_model_20251028-113151.keras
399/399 - 3s - 8ms/step - accuracy: 0.8265 - loss: 0.7903 - val_accuracy: 0.8012 - val_loss: 0.8129 - learning_rate: 1.0000e-03
Epoch 3/50

Epoch 3: val_loss improved from 0.81287 to 0.66862, saving model to training_logs\checkpoints\best_model_20251028-113151.keras
399/399 - 3s - 8ms/step - accuracy: 0.8348 - loss: 0.6818 - val_accuracy: 0.8303 - val_loss: 0.6686 - learning_rate: 1.0000e-03
Epoch 4/50

Epoch 4: val_loss improved from 0.66862 to 0.55919, saving model to training_logs\checkpoints\best_model_20251028-113151.keras
399/399 - 3s - 7ms/step - accuracy: 0.8377 - loss: 0.6135 - va