#                 Internship Task

---

###  Task Title: **Comparative Study of Deep Learning Models on MNIST Dataset**



##  Task Overview
**Section 1**:   Dataset Loading & Preprocessing   
**Section 2**:   LeNet Model – Training & Evaluation on MNIST   
**Section 3**:   ResNet Model – Training & Evaluation on MNIST  
**Section 4**:   VGG16 Model – Training & Evaluation on MNIST  
**Section 5**:   Transformer Model – Training & Evaluation on MNIST  


In [1]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


#Section 1: Dataset Loading & Preprocessing

In [4]:
import numpy as np
import struct

def load_images(filename):
   with open(filename,'rb') as f:

    magic, num,rows,cols = struct.unpack('>IIII' ,f.read(16))
    images = np.frombuffer(f.read(), dtype = np.uint8)
    images = images.reshape(num, rows, cols,1)
    return images.astype(np.float32) /255.0


In [5]:
def load_labels(filename):
  with open(filename, 'rb') as f:
    magics, num = struct.unpack('>II', f.read(8))
    labels = np.frombuffer(f.read(), dtype = np.uint8)
    return labels

In [6]:
base_path = '/content/drive/MyDrive/mnist_data'

x_train = load_images(f'{base_path}/train-images-idx3-ubyte/train-images-idx3-ubyte')
y_train = load_labels(f'{base_path}/train-labels-idx1-ubyte/train-labels-idx1-ubyte')
x_test = load_images(f'{base_path}/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
y_test = load_labels(f'{base_path}/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')


In [7]:
print("Train shapes:", x_train.shape, y_train.shape)
print("Test shapes:", x_test.shape, y_test.shape)

Train shapes: (60000, 28, 28, 1) (60000,)
Test shapes: (10000, 28, 28, 1) (10000,)


#Section 2: LeNet Model – Training & Evaluation on MNIST

In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score

In [9]:
#LeNet model architecture
def create_lenet():
    model = models.Sequential([
        layers.Input(shape=(28,28,1)),
        layers.Conv2D(6,kernel_size=5,activation='relu',padding='same'),
        layers.AveragePooling2D(pool_size = 2),
        layers.Conv2D(16,kernel_size = 5, activation = 'relu'),
        layers.AveragePooling2D(pool_size = 2),
        layers.Flatten(),
        layers.Dense(120, activation = 'relu'),
        layers.Dense(84, activation = 'relu'),
        layers.Dense(10, activation  = 'softmax')


                             ])
    return model

In [10]:
lenet = create_lenet()
lenet.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])


In [11]:
# Model Training
lenet.fit(x_train, y_train, epochs = 25, batch_size = 64, validation_split = 0.1)
test_loss, test_acc = lenet.evaluate(x_test, y_test, verbose = 0)
print(f"\n LeNet Test Accuracy: {test_acc:.4f}")

Epoch 1/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.8211 - loss: 0.6184 - val_accuracy: 0.9755 - val_loss: 0.0825
Epoch 2/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9724 - loss: 0.0916 - val_accuracy: 0.9808 - val_loss: 0.0640
Epoch 3/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9809 - loss: 0.0612 - val_accuracy: 0.9848 - val_loss: 0.0521
Epoch 4/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9854 - loss: 0.0447 - val_accuracy: 0.9845 - val_loss: 0.0513
Epoch 5/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9881 - loss: 0.0366 - val_accuracy: 0.9880 - val_loss: 0.0410
Epoch 6/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9903 - loss: 0.0313 - val_accuracy: 0.9870 - val_loss: 0.0437
Epoch 7/25
[1m844/844[0m 

In [12]:
#Generate Prediction
y_pred = lenet.predict(x_test).argmax(axis=1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [13]:
print("\n LeNet Classification Report:")
print(classification_report(y_test, y_pred, digits=4))


 LeNet Classification Report:
              precision    recall  f1-score   support

           0     0.9889    0.9980    0.9934       980
           1     0.9956    0.9947    0.9952      1135
           2     0.9942    0.9903    0.9922      1032
           3     0.9844    0.9980    0.9912      1010
           4     0.9928    0.9878    0.9903       982
           5     0.9877    0.9877    0.9877       892
           6     0.9937    0.9854    0.9895       958
           7     0.9922    0.9903    0.9912      1028
           8     0.9938    0.9867    0.9902       974
           9     0.9891    0.9931    0.9911      1009

    accuracy                         0.9913     10000
   macro avg     0.9912    0.9912    0.9912     10000
weighted avg     0.9913    0.9913    0.9913     10000



#Section 3: ResNet  Model – Training & Evaluation on MNIST

In [14]:
def residual_block(x, filters, kernel_size=3):
    shortcut = x
    x = layers.Conv2D(filters, kernel_size, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)



 # Add skip connection
    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x


In [15]:
def create_resnet_mnist(input_shape=(28, 28, 1), num_classes=10):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
    x = residual_block(x, 32)
    x = layers.MaxPooling2D()(x)

    x = residual_block(x, 32)
    x = layers.MaxPooling2D()(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

In [16]:
resnet = create_resnet_mnist()
resnet.compile(optimizer='adam',
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])


In [17]:
# Train the model
resnet.fit(x_train, y_train, epochs=25, batch_size=64, validation_split=0.1)


test_loss, test_acc = resnet.evaluate(x_test, y_test, verbose=0)
print(f"\nResNet Test Accuracy: {test_acc:.4f}")

Epoch 1/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - accuracy: 0.8871 - loss: 0.4113 - val_accuracy: 0.9638 - val_loss: 0.1203
Epoch 2/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - accuracy: 0.9837 - loss: 0.0532 - val_accuracy: 0.9900 - val_loss: 0.0404
Epoch 3/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9887 - loss: 0.0372 - val_accuracy: 0.9913 - val_loss: 0.0308
Epoch 4/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9906 - loss: 0.0288 - val_accuracy: 0.9895 - val_loss: 0.0383
Epoch 5/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9928 - loss: 0.0226 - val_accuracy: 0.9882 - val_loss: 0.0468
Epoch 6/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9940 - loss: 0.0191 - val_accuracy: 0.9933 - val_loss: 0.0284
Epoch 7/25
[1m844/844

In [18]:
# Predictions
y_pred = resnet.predict(x_test).argmax(axis=1)
print("\nResNet Classification Report:")
print(classification_report(y_test, y_pred, digits=4))


resnet_metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred, average='weighted'),
    "recall": recall_score(y_test, y_pred, average='weighted')
}

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step

ResNet Classification Report:
              precision    recall  f1-score   support

           0     0.9959    0.9949    0.9954       980
           1     0.9887    1.0000    0.9943      1135
           2     0.9971    0.9903    0.9937      1032
           3     0.9796    0.9970    0.9882      1010
           4     0.9939    0.9898    0.9918       982
           5     0.9866    0.9888    0.9877       892
           6     0.9958    0.9906    0.9932       958
           7     0.9913    0.9961    0.9937      1028
           8     0.9858    0.9959    0.9908       974
           9     0.9969    0.9663    0.9814      1009

    accuracy                         0.9911     10000
   macro avg     0.9911    0.9910    0.9910     10000
weighted avg     0.9912    0.9911    0.9911     10000



#Section 4: VGG16  Model – Training & Evaluation on MNIST

In [19]:
def create_vgg_mnist(input_shape=(32, 32, 3), num_classes=10):
    model = models.Sequential()

    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model


In [20]:
x_train_vgg = tf.image.resize(x_train, [32, 32])
x_train_vgg = tf.image.grayscale_to_rgb(x_train_vgg)

x_test_vgg = tf.image.resize(x_test, [32, 32])
x_test_vgg = tf.image.grayscale_to_rgb(x_test_vgg)

In [21]:
# Create and compile VGG model
vgg = create_vgg_mnist()
vgg.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])


vgg.fit(x_train_vgg, y_train, epochs=25, batch_size=64, validation_split=0.1)


test_loss, test_acc = vgg.evaluate(x_test_vgg, y_test, verbose=0)
print(f"\nVGG16-style Test Accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 16ms/step - accuracy: 0.9062 - loss: 0.2923 - val_accuracy: 0.9865 - val_loss: 0.0469
Epoch 2/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9861 - loss: 0.0481 - val_accuracy: 0.9920 - val_loss: 0.0323
Epoch 3/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9909 - loss: 0.0308 - val_accuracy: 0.9923 - val_loss: 0.0306
Epoch 4/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9922 - loss: 0.0231 - val_accuracy: 0.9927 - val_loss: 0.0294
Epoch 5/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9933 - loss: 0.0196 - val_accuracy: 0.9918 - val_loss: 0.0365
Epoch 6/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9951 - loss: 0.0153 - val_accuracy: 0.9918 - val_loss: 0.0407
Epoch 7/25
[1m8

In [22]:
# Predictions
y_pred = vgg.predict(x_test_vgg).argmax(axis=1)
print("\n VGG16-style Classification Report:")
print(classification_report(y_test, y_pred, digits=4))


# Store metrics
vgg_metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred, average='weighted'),
    "recall": recall_score(y_test, y_pred, average='weighted')
}


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step

 VGG16-style Classification Report:
              precision    recall  f1-score   support

           0     0.9969    0.9990    0.9980       980
           1     0.9982    0.9965    0.9974      1135
           2     0.9971    0.9922    0.9947      1032
           3     0.9921    0.9970    0.9946      1010
           4     0.9939    0.9959    0.9949       982
           5     0.9944    0.9933    0.9938       892
           6     0.9937    0.9927    0.9932       958
           7     0.9903    0.9981    0.9942      1028
           8     0.9949    0.9969    0.9959       974
           9     0.9970    0.9871    0.9920      1009

    accuracy                         0.9949     10000
   macro avg     0.9949    0.9949    0.9949     10000
weighted avg     0.9949    0.9949    0.9949     10000



#Section 5: Transformer  Model – Training & Evaluation on MNIST


In [23]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score

In [24]:
#parameters

NUM_CLASSES = 10
D_MODEL = 64
NUM_HEADS = 4
FF_DIM = 128
NUM_LAYERS = 4
SEQ_LENGTH = 28
FEATURES = 28

In [25]:
# Positional Encoding Layer
class PositionalEncoding(layers.Layer):
    def __init__(self, seq_len, d_model):
        super().__init__()
        self.pos_encoding = self.get_positional_encoding(seq_len, d_model)

    def get_positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(
            pos=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
            d_model=d_model
        )
        # Create sin and cos separately
        sines = tf.math.sin(angle_rads[:, 0::2])
        cosines = tf.math.cos(angle_rads[:, 1::2])

        # Interleave sin and cos
        pos_encoding = tf.concat([sines, cosines], axis=-1)
        return pos_encoding[tf.newaxis, ...]

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / tf.pow(10000., (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return pos * angle_rates

    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]


In [26]:
# Transformer Encoder Block
def transformer_encoder(inputs, d_model, num_heads, ff_dim):

    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x)
    x = layers.Add()([x, inputs])


    ffn = layers.LayerNormalization(epsilon=1e-6)(x)
    ffn = layers.Dense(ff_dim, activation='relu')(ffn)
    ffn = layers.Dense(d_model)(ffn)
    return layers.Add()([ffn, x])

In [27]:
# Transformer model for MNIST
def create_transformer_model():
    inputs = layers.Input(shape=(SEQ_LENGTH, FEATURES))
    x = layers.Dense(D_MODEL)(inputs)
    x = PositionalEncoding(SEQ_LENGTH, D_MODEL)(x)

    for _ in range(NUM_LAYERS):
        x = transformer_encoder(x, D_MODEL, NUM_HEADS, FF_DIM)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    return models.Model(inputs=inputs, outputs=outputs)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0


transformer_model = create_transformer_model()
transformer_model.compile(optimizer='adam',
                          loss='sparse_categorical_crossentropy',
                          metrics=['accuracy'])


transformer_model.fit(x_train, y_train, epochs=25, batch_size=64, validation_split=0.1)


test_loss, test_acc = transformer_model.evaluate(x_test, y_test, verbose=0)
print(f"\nTransformer Test Accuracy: {test_acc:.4f}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 20ms/step - accuracy: 0.7185 - loss: 0.8005 - val_accuracy: 0.9732 - val_loss: 0.0905
Epoch 2/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 9ms/step - accuracy: 0.9638 - loss: 0.1250 - val_accuracy: 0.9713 - val_loss: 0.0946
Epoch 3/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9677 - loss: 0.1016 - val_accuracy: 0.9827 - val_loss: 0.0601
Epoch 4/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9757 - loss: 0.0827 - val_accuracy: 0.9770 - val_loss: 0.0760
Epoch 5/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.9770 - loss: 0.0753 - val_accuracy: 0.9772 - val_loss: 0.0674
Epoch 6/25

In [28]:
# Prediction
y_pred = transformer_model.predict(x_test).argmax(axis=1)
print("\nTransformer Classification Report:")
print(classification_report(y_test, y_pred, digits=4))

# Store metrics
transformer_metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred, average='weighted'),
    "recall": recall_score(y_test, y_pred, average='weighted')
}


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step

Transformer Classification Report:
              precision    recall  f1-score   support

           0     0.9948    0.9816    0.9882       980
           1     0.9956    0.9859    0.9907      1135
           2     0.9846    0.9903    0.9874      1032
           3     0.9920    0.9871    0.9896      1010
           4     0.9878    0.9919    0.9898       982
           5     0.9876    0.9821    0.9848       892
           6     0.9753    0.9885    0.9819       958
           7     0.9855    0.9893    0.9874      1028
           8     0.9592    0.9908    0.9747       974
           9     0.9909    0.9663    0.9784      1009

    accuracy                         0.9854     10000
   macro avg     0.9853    0.9854    0.9853     10000
weighted avg     0.9855    0.9854    0.9854     10000

