In [None]:
import tensorflow as tf
from tensorflow.keras import layers, regularizers, Model, Input
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape(-1, 784).astype("float32") / 255.0
x_test = x_test.reshape(-1, 784).astype("float32") / 255.0

noise = np.random.normal(0, 0.05, x_train.shape)
x_train_aug = np.clip(x_train + noise, 0.0, 1.0)

# Residual block definition
def residual_block(x, units, dropout_rate=0.5):
    shortcut = x
    if x.shape[-1] != units:
        shortcut = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(shortcut)

    x = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Dropout(dropout_rate)(x)

    x = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x

# Model with >10 layers using residuals
def create_deep_resnet():
    inputs = Input(shape=(784,))
    x = layers.BatchNormalization()(inputs)

    x = layers.Dense(1024, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    # Add 6 residual blocks with varied units
    x = residual_block(x, 1024, 0.4)
    x = residual_block(x, 512, 0.4)
    x = residual_block(x, 512, 0.4)
    x = residual_block(x, 256, 0.3)
    x = residual_block(x, 256, 0.3)
    x = residual_block(x, 128, 0.2)

    x = layers.Dense(64, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(10, activation='softmax')(x)

    return Model(inputs=inputs, outputs=outputs)

model = create_deep_resnet()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0007),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
history = model.fit(
    x_train_aug, y_train,
    validation_split=0.1,
    epochs=60,
    batch_size=128,
    callbacks=[early_stop],
    verbose=2
)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Final Test Accuracy: {test_acc * 100:.2f}%")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/60
422/422 - 95s - 226ms/step - accuracy: 0.7994 - loss: 1.3572 - val_accuracy: 0.8545 - val_loss: 1.1443
Epoch 2/60
422/422 - 81s - 192ms/step - accuracy: 0.8609 - loss: 1.0639 - val_accuracy: 0.8632 - val_loss: 0.9699
Epoch 3/60
422

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, regularizers, Model, Input
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape(-1, 784).astype("float32") / 255.0
x_test = x_test.reshape(-1, 784).astype("float32") / 255.0

noise = np.random.normal(0, 0.05, x_train.shape)
x_train_aug = np.clip(x_train + noise, 0.0, 1.0)

def residual_block(x, units, dropout_rate=0.5):
    shortcut = x
    if x.shape[-1] != units:
        shortcut = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(shortcut)

    x = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Dropout(dropout_rate)(x)

    x = layers.Dense(units, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x

# Model with >10 layers using residuals
def create_deep_resnet():
    inputs = Input(shape=(784,))
    x = layers.BatchNormalization()(inputs)

    x = layers.Dense(1024, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    # Add 6 residual blocks with varied units
    x = residual_block(x, 1024, 0.4)
    x = residual_block(x, 512, 0.4)
    x = residual_block(x, 512, 0.4)
    x = residual_block(x, 256, 0.3)
    x = residual_block(x, 256, 0.3)
    x = residual_block(x, 128, 0.2)

    x = layers.Dense(64, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(10, activation='softmax')(x)

    return Model(inputs=inputs, outputs=outputs)

model = create_deep_resnet()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0007),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
history = model.fit(
    x_train_aug, y_train,
    validation_split=0.1,
    epochs=100,
    batch_size=128,
    callbacks=[early_stop],
    verbose=2
)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Final Test Accuracy: {test_acc * 100:.2f}%")


Epoch 1/100
422/422 - 92s - 218ms/step - accuracy: 0.7991 - loss: 1.3567 - val_accuracy: 0.8548 - val_loss: 1.1295
Epoch 2/100
422/422 - 79s - 187ms/step - accuracy: 0.8589 - loss: 1.0662 - val_accuracy: 0.8577 - val_loss: 1.0151
Epoch 3/100
422/422 - 85s - 201ms/step - accuracy: 0.8722 - loss: 0.9071 - val_accuracy: 0.8602 - val_loss: 0.8846
Epoch 4/100
422/422 - 70s - 166ms/step - accuracy: 0.8816 - loss: 0.7714 - val_accuracy: 0.8745 - val_loss: 0.7288
Epoch 5/100
422/422 - 72s - 171ms/step - accuracy: 0.8878 - loss: 0.6620 - val_accuracy: 0.8783 - val_loss: 0.6556
Epoch 6/100
422/422 - 71s - 167ms/step - accuracy: 0.8911 - loss: 0.5797 - val_accuracy: 0.8713 - val_loss: 0.5969
Epoch 7/100
422/422 - 82s - 195ms/step - accuracy: 0.8957 - loss: 0.5205 - val_accuracy: 0.8733 - val_loss: 0.5895
Epoch 8/100
422/422 - 71s - 168ms/step - accuracy: 0.8989 - loss: 0.4765 - val_accuracy: 0.8772 - val_loss: 0.5160
Epoch 9/100
422/422 - 71s - 168ms/step - accuracy: 0.9052 - loss: 0.4408 - val_a

**Reference Model: 87-88%,   Model 1: 89.11%,   Model 2: 88.36%**



*Conclusion:*

1.   The above 2 models have deeper architecture with over 10 layers, including 6 residual blocks than the Reference code.
2.   Residual blocks help prevent vanishing gradients and allow deeper models to train effectively.
1.   This model includes L2 regularization, dropout, and batch normalization for better generalization.
2.   Data augmentation using Gaussian noise makes the model more robust to input variations.
1.   EarlyStopping prevents overfitting by stopping training when validation accuracy stops improving.
2.   A smaller learning rate with the Adam optimizer ensures stable and smooth convergence.
1.   The combination of depth, regularization, augmentation, and residuals leads to improved accuracy (Model 1: 89.11% and Model 2: 88.36%) over Reference model (~87–88%).







