# NNDL_LAB4_2348045

In [None]:
#1

In [4]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras import Input, Model  # Import Input and Model

input_dim = 100  # Example input dimension
output_dim = 10  # Example output dimension (for classification with 10 classes)

# Sequential API
model_seq = models.Sequential([
    layers.Dense(512, activation='relu', name='H-Layer-1', input_shape=(input_dim,),
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    layers.Dense(512, activation='relu', name='H-Layer-2',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    layers.Dense(1024, activation='relu', name='H-Layer-3',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    layers.Dense(output_dim, activation='softmax', name='O-Layer',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001))
])

model_seq.summary()



Sequential API Model
The Sequential API model in the notebook is constructed using TensorFlow's Keras library. It consists of three hidden dense layers (H-Layer-1, H-Layer-2, H-Layer-3) with ReLU activation functions, which help introduce non-linearity into the model. The output layer (O-Layer) utilizes a softmax activation function, suitable for multi-class classification problems like the MNIST dataset. The model summary indicates it has a total of 849,930 trainable parameters. This architecture ensures that the model has sufficient capacity to learn the underlying patterns in the data while maintaining simplicity in its structure.

In [5]:
# Functional API
inputs = Input(shape=(input_dim,))
x = layers.Dense(512, activation='relu', name='H-Layer-1',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001))(inputs)
x = layers.Dense(512, activation='relu', name='H-Layer-2',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.Dense(1024, activation='relu', name='H-Layer-3',
                 kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
outputs = layers.Dense(output_dim, activation='softmax', name='O-Layer',
                       kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)

model_func = Model(inputs, outputs)

model_func.summary()


Functional API Model
The Functional API model follows a similar architecture to the Sequential API model but uses TensorFlow's Input, Model, and layers.Dense classes to define the model more flexibly. This approach allows for more complex architectures where layers can be reused or connected in non-linear ways. Despite this flexibility, the model maintains the same structure: three dense layers with ReLU activations and an output layer with a softmax activation. The summary of this model also reveals 849,930 trainable parameters, indicating that both models have the same capacity and complexity.

In [None]:
#2

In [10]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Load data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [11]:
# Simple Dense Layers
model1 = Sequential([
    Dense(512, activation='relu', input_shape=(784,)),
    Dense(512, activation='relu'),
    Dense(10, activation='softmax')
])

model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history1 = model1.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

test_loss1, test_acc1 = model1.evaluate(x_test, y_test)
print(f'Architecture 1 Test accuracy: {test_acc1}')


Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.8958 - loss: 0.3482 - val_accuracy: 0.9601 - val_loss: 0.1356
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9724 - loss: 0.0902 - val_accuracy: 0.9711 - val_loss: 0.0949
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9828 - loss: 0.0586 - val_accuracy: 0.9686 - val_loss: 0.1096
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9865 - loss: 0.0435 - val_accuracy: 0.9717 - val_loss: 0.1034
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9898 - loss: 0.0316 - val_accuracy: 0.9743 - val_loss: 0.1001
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9904 - loss: 0.0276 - val_accuracy: 0.9753 - val_loss: 0.1069
Epoch 7/10

In [12]:
from tensorflow.keras.layers import Dropout

# Adding Dropout
model2 = Sequential([
    Dense(512, activation='relu', input_shape=(784,)),
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dropout(0.2),
    Dense(10, activation='softmax')
])

model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history2 = model2.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

test_loss2, test_acc2 = model2.evaluate(x_test, y_test)
print(f'Architecture 2 Test accuracy: {test_acc2}')


Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.8855 - loss: 0.3778 - val_accuracy: 0.9654 - val_loss: 0.1162
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9670 - loss: 0.1077 - val_accuracy: 0.9694 - val_loss: 0.0989
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9731 - loss: 0.0826 - val_accuracy: 0.9737 - val_loss: 0.0855
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9805 - loss: 0.0636 - val_accuracy: 0.9759 - val_loss: 0.0833
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9842 - loss: 0.0492 - val_accuracy: 0.9767 - val_loss: 0.0846
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9842 - loss: 0.0499 - val_accuracy: 0.9739 - val_loss: 0.1091
Epoch 7/10

In [13]:
from tensorflow.keras.layers import BatchNormalization

# Adding Batch Normalization
model3 = Sequential([
    Dense(512, activation='relu', input_shape=(784,)),
    BatchNormalization(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history3 = model3.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

test_loss3, test_acc3 = model3.evaluate(x_test, y_test)
print(f'Architecture 3 Test accuracy: {test_acc3}')


Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.8970 - loss: 0.3470 - val_accuracy: 0.9625 - val_loss: 0.1259
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9649 - loss: 0.1097 - val_accuracy: 0.9719 - val_loss: 0.0937
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9732 - loss: 0.0855 - val_accuracy: 0.9694 - val_loss: 0.1014
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9770 - loss: 0.0719 - val_accuracy: 0.9735 - val_loss: 0.0936
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9816 - loss: 0.0577 - val_accuracy: 0.9747 - val_loss: 0.0862
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9854 - loss: 0.0468 - val_accuracy: 0.9762 - val_loss: 0.0889
Epoch 7/10

Simple Dense Layers Model: 

This model, without any additional regularization techniques, achieves a test accuracy of approximately 97.83%. It serves as a baseline to compare the effects of dropout and batch normalization.

Model with Dropout: 

Dropout layers are added after each dense layer to prevent overfitting by randomly dropping units during training. This model achieves a slightly improved test accuracy of around 97.91%, indicating that dropout effectively enhances generalization.

Model with Batch Normalization: 

Batch normalization layers are added after each dense layer to stabilize and speed up training by normalizing the output of the previous activation layer. This model achieves a test accuracy of approximately 97.90%, demonstrating that batch normalization also contributes to improved performance.

In [None]:
#3

In [14]:
# Common model
def create_model():
    model = Sequential([
        Dense(512, activation='relu', input_shape=(784,)),
        Dense(512, activation='relu'),
        Dense(10, activation='softmax')
    ])
    return model

In [16]:
# Compile with adam optimizer and categorical_crossentropy loss
model_adam = create_model()
model_adam.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_adam = model_adam.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.8953 - loss: 0.3355 - val_accuracy: 0.9649 - val_loss: 0.1124
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9744 - loss: 0.0822 - val_accuracy: 0.9682 - val_loss: 0.1063
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9831 - loss: 0.0536 - val_accuracy: 0.9722 - val_loss: 0.0977
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.9864 - loss: 0.0421 - val_accuracy: 0.9758 - val_loss: 0.0878
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9909 - loss: 0.0283 - val_accuracy: 0.9773 - val_loss: 0.0825
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9919 - loss: 0.0228 - val_accuracy: 0.9735 - val_loss: 0.1119
Epoch 7/10

In [17]:
from tensorflow.keras.metrics import Precision

# Compile with sgd optimizer and binary_crossentropy loss
model_sgd = create_model()
model_sgd.compile(optimizer='sgd', loss='binary_crossentropy', metrics=[Precision()])
history_sgd = model_sgd.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.3635 - precision: 0.6356 - val_loss: 0.2153 - val_precision: 0.9939
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.1966 - precision: 0.9781 - val_loss: 0.1427 - val_precision: 0.9539
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.1391 - precision: 0.9438 - val_loss: 0.1126 - val_precision: 0.9364
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.1128 - precision: 0.9304 - val_loss: 0.0961 - val_precision: 0.9328
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0985 - precision: 0.9261 - val_loss: 0.0855 - val_precision: 0.9314
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0888 - precision: 0.9242 - val_loss: 0.0783 - val_precision: 0.9315
Epoc

In [18]:
from tensorflow.keras.metrics import Recall

# Compile with rmsprop optimizer and mean_squared_error loss
model_rmsprop = create_model()
model_rmsprop.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=[Recall()])
history_rmsprop = model_rmsprop.fit(x_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - loss: 0.0220 - recall: 0.7772 - val_loss: 0.0063 - val_recall: 0.9517
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - loss: 0.0061 - recall: 0.9529 - val_loss: 0.0053 - val_recall: 0.9598
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - loss: 0.0038 - recall: 0.9724 - val_loss: 0.0042 - val_recall: 0.9682
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 0.0028 - recall: 0.9804 - val_loss: 0.0039 - val_recall: 0.9711
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - loss: 0.0023 - recall: 0.9848 - val_loss: 0.0040 - val_recall: 0.9711
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - loss: 0.0017 - recall: 0.9884 - val_loss: 0.0037 - val_recall: 0.9730
Epoch 7/10
[1m1500/1500[0m [32m