# Fashion MNIST

## Neural network tests



In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import fashion_mnist #to import our dataset
from tensorflow.keras.models import Sequential, Model # imports our type of network
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, BatchNormalization # imports our layers we want to use

from tensorflow.python.keras.losses import categorical_crossentropy #loss function
from tensorflow.keras.optimizers import Adam, SGD #optimisers
from tensorflow.keras.utils import to_categorical #some function for data preparation

from tensorflow.keras.callbacks import ModelCheckpoint #checkpoints used to keep track of best model

from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('float32') 

In [3]:
batch_size = 128
num_classes = 10
epochs = 20

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()


x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
x_train shape: (60000, 28, 28)
60000 train samples
10000 test samples


In [4]:
input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model.summary()

I0000 00:00:1763328926.196557  284168 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3537 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [5]:
history1=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20


2025-11-16 21:36:11.916033: I external/local_xla/xla/service/service.cc:163] XLA service 0x77cd28005990 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-16 21:36:11.916527: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2025-11-16 21:36:11.986577: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-16 21:36:12.181664: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002



[1m 56/469[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 3ms/step - accuracy: 0.5724 - loss: 1.2741

I0000 00:00:1763328974.402232  284705 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m454/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7583 - loss: 0.6987

2025-11-16 21:36:15.599534: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.





[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.8172 - loss: 0.5169 - val_accuracy: 0.8478 - val_loss: 0.4265
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8666 - loss: 0.3698 - val_accuracy: 0.8528 - val_loss: 0.4052
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8790 - loss: 0.3341 - val_accuracy: 0.8723 - val_loss: 0.3598
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8857 - loss: 0.3080 - val_accuracy: 0.8772 - val_loss: 0.3405
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8926 - loss: 0.2896 - val_accuracy: 0.8746 - val_loss: 0.3492
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8989 - loss: 0.2719 - val_accuracy: 0.8795 - val_loss: 0.3357
Epoch 7/20
[1m469/469[0m [32m━━━━━━

In [6]:
# Let's add some regularisation:

from tensorflow.keras.regularizers import l1, l2

#dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model.summary()

In [7]:
# Dropout

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dropout(rate=dropout_rate)(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_dropout= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_dropout.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model_dropout.summary()

In [8]:
history4=model_dropout.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
[1m454/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.0961 - loss: 54.3599

2025-11-16 21:37:38.662570: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.



[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - accuracy: 0.0974 - loss: 10.9867 - val_accuracy: 0.1000 - val_loss: 2.3097
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.0989 - loss: 2.3074 - val_accuracy: 0.1000 - val_loss: 2.3071
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0971 - loss: 2.3048 - val_accuracy: 0.1000 - val_loss: 2.3059
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.0975 - loss: 2.3039 - val_accuracy: 0.1000 - val_loss: 2.3053
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0976 - loss: 2.3034 - val_accuracy: 0.1000 - val_loss: 2.3050
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0973 - loss: 2.3032 - val_accuracy: 0.1000 - val_loss: 2.3048
Epoch 7/20
[1m469/469[0m [32m━━━━

In [None]:
# Batch-Normalisation

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(200, activation='relu')(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_batch_normalisation= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_batch_normalisation.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model_batch_normalisation.summary()

In [None]:
history4=model_batch_normalisation.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

In [None]:
# Saving a model

# Save the model
model_batch_normalisation.save('mnist_model_batch_normalisation.keras')


# Load the model
loaded_model = tf.keras.models.load_model('mnist_model_batch_normalisation.keras')
print("Model loaded from 'mnist_model_batch_normalisation.keras")

# Evaluate the loaded model
loss, accuracy = loaded_model.evaluate(x_test, y_test)

In [None]:
# keeping the best model on the test/validation data.

# Define a ModelCheckpoint callback
checkpoint = ModelCheckpoint('best_model.keras', 
                             monitor='val_accuracy',  # You can monitor 'val_loss' or any other metric
                             save_best_only=True, 
                             mode='max',  # Use 'max' if monitoring accuracy; 'min' if monitoring loss
                             verbose=1)

history4=model_batch_normalisation.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=[checkpoint],
          validation_data=(x_test, y_test))

best_model = tf.keras.models.load_model('best_model.keras')

In [None]:
import optuna

# Optuna objective function
def objective(trial):
    # Suggest hyperparameters
    num_layers = trial.suggest_int('num_layers', 1, 3)
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    units = trial.suggest_int('units', 32, 128)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    
    # Build the model
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    
    for _ in range(num_layers):
        model.add(Dense(units=units, activation=activation))
        model.add(Dropout(rate=dropout_rate))
    
    model.add(Dense(10, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Train the model
    history = model.fit(x_train, y_train, 
                        validation_split=0.2,
                        epochs=5,  # To keep it quick for demonstration; you can increase it
                        batch_size=128, 
                        verbose=0)
    
    # Evaluate the model
    score = model.evaluate(x_test, y_test, verbose=0)
    accuracy = score[1]
    return accuracy

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # Set the number of trials

# Print the best trial
print('Best trial:')
print(f' Value: {study.best_trial.value}')
print(' Params: ')
for key, value in study.best_trial.params.items():
    print(f'    {key}: {value}')