# Fashion MNIST

In [22]:
import keras
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import HeNormal

# Load and preprocess the Fashion MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

# Convert labels to one-hot encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Set batch size and number of epochs
batch_size = 128
epochs = 25

## 1: Deep MLP with Increased Dropout
This setup focuses on more layers with increased dropout to prevent overfitting, along with the HeNormal initialiser to stabilise the training.

In [14]:
# Define the model architecture
model = Sequential([
    Dense(512, activation='relu', kernel_initializer=HeNormal(), input_shape=(784,)),
    Dropout(0.3),
    Dense(512, activation='relu', kernel_initializer=HeNormal()),
    Dropout(0.3),
    Dense(256, activation='relu', kernel_initializer=HeNormal()),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer=Adam(learning_rate=0.001), 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/25
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - Precision: 0.8092 - Recall: 0.6192 - accuracy: 0.7173 - loss: 0.7896 - mean_squared_error: 0.0381 - top_k_categorical_accuracy: 0.9738 - val_Precision: 0.8632 - val_Recall: 0.7857 - val_accuracy: 0.8248 - val_loss: 0.4811 - val_mean_squared_error: 0.0250 - val_top_k_categorical_accuracy: 0.9963
Epoch 2/25
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - Precision: 0.8816 - Recall: 0.7986 - accuracy: 0.8403 - loss: 0.4402 - mean_squared_error: 0.0225 - top_k_categorical_accuracy: 0.9959 - val_Precision: 0.8908 - val_Recall: 0.8177 - val_accuracy: 0.8530 - val_loss: 0.3992 - val_mean_squared_error: 0.0206 - val_top_k_categorical_accuracy: 0.9969
Epoch 3/25
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - Precision: 0.8916 - Recall: 0.8238 - accuracy: 0.8568 - loss: 0.3910 - mean_squared_error: 0.0203 - top_k_categorical_accuracy: 0.9972 - val_Pre

## 2: Simplified MLP with ELU Activation and RMSprop Optimizer
This architecture uses the ELU activation function, which can often improve gradient flow, and the RMSprop optimiser, which sometimes performs better on classification tasks.

In [23]:
# Define the model architecture
model = Sequential([
    Dense(512, activation='elu', kernel_initializer=HeNormal(), input_shape=(784,)),
    Dropout(0.25),
    Dense(256, activation='elu'),
    Dropout(0.25),
    Dense(128, activation='elu'),
    Dropout(0.25),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer='RMSprop', 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=64, epochs=30, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - Precision: 0.7707 - Recall: 0.6655 - accuracy: 0.7165 - loss: 0.8437 - mean_squared_error: 0.0393 - top_k_categorical_accuracy: 0.9778 - val_Precision: 0.8579 - val_Recall: 0.8033 - val_accuracy: 0.8260 - val_loss: 0.4923 - val_mean_squared_error: 0.0245 - val_top_k_categorical_accuracy: 0.9954
Epoch 2/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - Precision: 0.8598 - Recall: 0.7941 - accuracy: 0.8253 - loss: 0.4805 - mean_squared_error: 0.0244 - top_k_categorical_accuracy: 0.9952 - val_Precision: 0.8710 - val_Recall: 0.8132 - val_accuracy: 0.8389 - val_loss: 0.4575 - val_mean_squared_error: 0.0227 - val_top_k_categorical_accuracy: 0.9950
Epoch 3/30
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - Precision: 0.8783 - Recall: 0.8175 - accuracy: 0.8460 - loss: 0.4249 - mean_squared_error: 0.0217 - top_k_categorical_accuracy: 0.9970 - val_Pre

## 3: Wider MLP with LeakyReLU Activation and SGD Optimizer
This option increases the width of each layer and employs LeakyReLU for better gradient flow in deeper networks. It is combined with the SGD optimiser for controlled learning.

In [25]:
from keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD

# Define the model architecture
model = Sequential([
    Dense(1024, kernel_initializer=HeNormal(), input_shape=(784,)),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(512, kernel_initializer=HeNormal()),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(256, kernel_initializer=HeNormal()),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(learning_rate=0.01, momentum=0.9), 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=batch_size, epochs=35, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/35
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - Precision: 0.7685 - Recall: 0.4937 - accuracy: 0.6267 - loss: 1.0437 - mean_squared_error: 0.0482 - top_k_categorical_accuracy: 0.9432 - val_Precision: 0.8821 - val_Recall: 0.7679 - val_accuracy: 0.8270 - val_loss: 0.4764 - val_mean_squared_error: 0.0246 - val_top_k_categorical_accuracy: 0.9958
Epoch 2/35
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - Precision: 0.8675 - Recall: 0.7592 - accuracy: 0.8157 - loss: 0.5206 - mean_squared_error: 0.0262 - top_k_categorical_accuracy: 0.9941 - val_Precision: 0.8874 - val_Recall: 0.8019 - val_accuracy: 0.8450 - val_loss: 0.4284 - val_mean_squared_error: 0.0221 - val_top_k_categorical_accuracy: 0.9966
Epoch 3/35
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - Precision: 0.8773 - Recall: 0.7949 - accuracy: 0.8374 - loss: 0.4528 - mean_squared_error: 0.0231 - top_k_categorical_accuracy: 0.9960 - val_Pre

# CIFAR 10

In [16]:
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import HeNormal

# Load and preprocess the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.reshape(50000, 32 * 32 * 3).astype('float32') / 255
x_test = x_test.reshape(10000, 32 * 32 * 3).astype('float32') / 255

# Convert labels to one-hot encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Set batch size and number of epochs
batch_size = 128
epochs = 25

In [17]:
# Define the model architecture for CIFAR-10
model = Sequential([
    Dense(512, activation='relu', kernel_initializer=HeNormal(), input_shape=(3072,)),
    Dropout(0.3),
    Dense(512, activation='relu', kernel_initializer=HeNormal()),
    Dropout(0.3),
    Dense(256, activation='relu', kernel_initializer=HeNormal()),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer=Adam(learning_rate=0.001), 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - Precision: 0.2795 - Recall: 0.0129 - accuracy: 0.1860 - loss: 2.2772 - mean_squared_error: 0.0893 - top_k_categorical_accuracy: 0.6825 - val_Precision: 0.7064 - val_Recall: 0.0166 - val_accuracy: 0.3203 - val_loss: 1.8684 - val_mean_squared_error: 0.0797 - val_top_k_categorical_accuracy: 0.8319
Epoch 2/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - Precision: 0.5370 - Recall: 0.0426 - accuracy: 0.2742 - loss: 1.9440 - mean_squared_error: 0.0818 - top_k_categorical_accuracy: 0.7980 - val_Precision: 0.7075 - val_Recall: 0.0237 - val_accuracy: 0.3446 - val_loss: 1.8206 - val_mean_squared_error: 0.0783 - val_top_k_categorical_accuracy: 0.8497
Epoch 3/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - Precision: 0.5342 - Recall: 0.0492 - accuracy: 0.2954 - loss: 1.9028 - mean_squared_error: 0.0806 - top_k_categorical_accuracy: 0.8112 - val_Pre

In [18]:
# Define the model architecture for CIFAR-10
model = Sequential([
    Dense(512, activation='elu', kernel_initializer=HeNormal(), input_shape=(3072,)),
    Dropout(0.25),
    Dense(256, activation='elu'),
    Dropout(0.25),
    Dense(128, activation='elu'),
    Dropout(0.25),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer='RMSprop', 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=64, epochs=30, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - Precision: 0.2310 - Recall: 0.0364 - accuracy: 0.1702 - loss: 2.7169 - mean_squared_error: 0.0960 - top_k_categorical_accuracy: 0.6485 - val_Precision: 0.5878 - val_Recall: 0.0763 - val_accuracy: 0.2987 - val_loss: 1.8835 - val_mean_squared_error: 0.0798 - val_top_k_categorical_accuracy: 0.8174
Epoch 2/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - Precision: 0.4845 - Recall: 0.0827 - accuracy: 0.2906 - loss: 1.9644 - mean_squared_error: 0.0823 - top_k_categorical_accuracy: 0.8014 - val_Precision: 0.5163 - val_Recall: 0.0698 - val_accuracy: 0.2934 - val_loss: 1.9488 - val_mean_squared_error: 0.0816 - val_top_k_categorical_accuracy: 0.8053
Epoch 3/30
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - Precision: 0.5529 - Recall: 0.1110 - accuracy: 0.3462 - loss: 1.8092 - mean_squared_error: 0.0777 - top_k_categorical_accuracy: 0.8483 - val_Pre

In [19]:
from keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD

# Define the model architecture for CIFAR-10
model = Sequential([
    Dense(1024, kernel_initializer=HeNormal(), input_shape=(3072,)),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(512, kernel_initializer=HeNormal()),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(256, kernel_initializer=HeNormal()),
    LeakyReLU(alpha=0.1),
    Dropout(0.4),
    Dense(num_classes, activation='softmax')
])

# Compile and train the model
model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(learning_rate=0.01, momentum=0.9), 
    metrics=['accuracy', 'Precision', 'Recall', 'top_k_categorical_accuracy', 'mean_squared_error']
)
model.fit(x_train, y_train, batch_size=batch_size, epochs=35, validation_data=(x_test, y_test), verbose=1)
score = model.evaluate(x_test, y_test, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test top-K categorical accuracy:', score[4])
print('Test mean squared error:', score[5])

Epoch 1/35






[1m385/391[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.3235 - Recall: 0.0119 - accuracy: 0.1966 - loss: 2.1994 - mean_squared_error: 0.0879 - top_k_categorical_accuracy: 0.6875




















[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - Precision: 0.3265 - Recall: 0.0122 - accuracy: 0.1974 - loss: 2.1978 - mean_squared_error: 0.0879 - top_k_categorical_accuracy: 0.6878 - val_Precision: 0.6535 - val_Recall: 0.0398 - val_accuracy: 0.3400 - val_loss: 1.8502 - val_mean_squared_error: 0.0790 - val_top_k_categorical_accuracy: 0.8416
Epoch 2/35
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - Precision: 0.5401 - Recall: 0.0525 - accuracy: 0.3088 - loss: 1.9093 - mean_squared_error: 0.0805 - top_k_categorical_accuracy: 0.8178 - val_Precision: 0.6611 - val_Recall: 0.0595 - val_accuracy: 0.3745 - val_loss: 1.7600 - val_mean_squared_error: 0.0762 - val_top_k_categorical_accuracy: 0.8665
Epoch 3/35
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - Precision: 0.5646 - Recall: 0.0754 - accuracy: 0.3432 - loss: 1.8296 - mean_squared_error: 0.0782 - top_k_categorical_accuracy: 0.8442 - val_Precision: 0.6