# 1. Feedforward Neural Network (Basic ANN)

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Build simple Feedforward Neural Network
model = Sequential([
    Flatten(input_shape=(28, 28)),  # Flatten 28x28 image to 1D array
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')  # 10 output classes for digits 0-9
])

# Compile and train model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")


  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.8804 - loss: 0.4272 - val_accuracy: 0.9598 - val_loss: 0.1343
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9648 - loss: 0.1193 - val_accuracy: 0.9713 - val_loss: 0.0907
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9771 - loss: 0.0740 - val_accuracy: 0.9753 - val_loss: 0.0832
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9843 - loss: 0.0527 - val_accuracy: 0.9761 - val_loss: 0.0769
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9876 - loss: 0.0404 - val_accuracy: 0.9765 - val_loss: 0.0760
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9721 - loss: 0.0908  
Test accuracy: 0.9765


### Increase Network Depth (More Hidden Layers)

In [2]:
# Increase model depth
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),  # Increased neurons
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')  # Output layer
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with More Layers: {test_acc:.4f}")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.8861 - loss: 0.3820 - val_accuracy: 0.9669 - val_loss: 0.1094
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9721 - loss: 0.0874 - val_accuracy: 0.9748 - val_loss: 0.0859
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9803 - loss: 0.0622 - val_accuracy: 0.9761 - val_loss: 0.0808
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9865 - loss: 0.0438 - val_accuracy: 0.9729 - val_loss: 0.0964
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9874 - loss: 0.0383 - val_accuracy: 0.9775 - val_loss: 0.0787
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9743 - loss: 0.0865  
Test Accuracy with More Layers: 0.9775


In [3]:
# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Increase model depth
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),  # Increased neurons
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')  # Output layer
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with More Layers: {test_acc:.4f}")

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.8845 - loss: 0.3893 - val_accuracy: 0.9692 - val_loss: 0.1004
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9716 - loss: 0.0906 - val_accuracy: 0.9731 - val_loss: 0.0887
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9787 - loss: 0.0668 - val_accuracy: 0.9732 - val_loss: 0.0867
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9854 - loss: 0.0474 - val_accuracy: 0.9782 - val_loss: 0.0747
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9886 - loss: 0.0359 - val_accuracy: 0.9720 - val_loss: 0.0945
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9694 - loss: 0.1098  
Test Accuracy with More Layers: 0.9720


### Add Dropout to Prevent Overfitting

In [4]:
from tensorflow.keras.layers import Dropout

# Model with Dropout
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dropout(0.3),  # Drop 30% of neurons

    Dense(128, activation='relu'),
    Dropout(0.3),

    Dense(64, activation='relu'),
    Dropout(0.3),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Dropout: {test_acc:.4f}")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.7991 - loss: 0.6312 - val_accuracy: 0.9596 - val_loss: 0.1339
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9474 - loss: 0.1829 - val_accuracy: 0.9676 - val_loss: 0.1150
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9600 - loss: 0.1420 - val_accuracy: 0.9699 - val_loss: 0.1018
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9652 - loss: 0.1254 - val_accuracy: 0.9722 - val_loss: 0.0965
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9680 - loss: 0.1155 - val_accuracy: 0.9766 - val_loss: 0.0855
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9725 - loss: 0.0998
Test Accuracy with Dropout: 0.9766


In [5]:
from tensorflow.keras.layers import Dropout

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Model with Dropout
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dropout(0.3),  # Drop 30% of neurons

    Dense(128, activation='relu'),
    Dropout(0.3),

    Dense(64, activation='relu'),
    Dropout(0.3),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Dropout: {test_acc:.4f}")

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.7888 - loss: 0.6618 - val_accuracy: 0.9606 - val_loss: 0.1378
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.9466 - loss: 0.1878 - val_accuracy: 0.9717 - val_loss: 0.0968
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.9600 - loss: 0.1383 - val_accuracy: 0.9730 - val_loss: 0.0878
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.9646 - loss: 0.1230 - val_accuracy: 0.9736 - val_loss: 0.0854
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9707 - loss: 0.1046 - val_accuracy: 0.9769 - val_loss: 0.0832
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9724 - loss: 0.0995  
Test Accuracy with Dropout: 0.9769


### Add Batch Normalization for Faster Training

In [6]:
from tensorflow.keras.layers import BatchNormalization

# Model with Batch Normalization
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    BatchNormalization(),  # Normalize activations
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Batch Normalization: {test_acc:.4f}")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.7952 - loss: 0.6723 - val_accuracy: 0.9543 - val_loss: 0.1380
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9257 - loss: 0.2493 - val_accuracy: 0.9653 - val_loss: 0.1081
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9398 - loss: 0.2042 - val_accuracy: 0.9694 - val_loss: 0.0998
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9498 - loss: 0.1705 - val_accuracy: 0.9740 - val_loss: 0.0885
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9552 - loss: 0.1523 - val_accuracy: 0.9734 - val_loss: 0.0858
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9677 - loss: 0.0953
Test Accuracy with Batch Normalization: 0.9734


In [7]:
from tensorflow.keras.layers import BatchNormalization

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0


# Model with Batch Normalization
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    BatchNormalization(),  # Normalize activations
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Batch Normalization: {test_acc:.4f}")

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.7957 - loss: 0.6716 - val_accuracy: 0.9553 - val_loss: 0.1412
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9273 - loss: 0.2491 - val_accuracy: 0.9653 - val_loss: 0.1122
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9406 - loss: 0.2008 - val_accuracy: 0.9691 - val_loss: 0.0961
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9488 - loss: 0.1726 - val_accuracy: 0.9725 - val_loss: 0.0868
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9542 - loss: 0.1547 - val_accuracy: 0.9752 - val_loss: 0.0789
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9713 - loss: 0.0884
Test Accuracy with Batch Normalization: 0.9752


### Use Learning Rate Scheduler (Adaptive Learning Rate)

In [8]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Learning rate scheduler callback
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), callbacks=[lr_scheduler])

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Learning Rate Scheduler: {test_acc:.4f}")


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 8ms/step - accuracy: 0.9581 - loss: 0.1422 - val_accuracy: 0.9770 - val_loss: 0.0728 - learning_rate: 0.0010
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9608 - loss: 0.1275 - val_accuracy: 0.9779 - val_loss: 0.0729 - learning_rate: 0.0010
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9649 - loss: 0.1170 - val_accuracy: 0.9785 - val_loss: 0.0713 - learning_rate: 0.0010
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.9637 - loss: 0.1179 - val_accuracy: 0.9792 - val_loss: 0.0655 - learning_rate: 0.0010
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9663 - loss: 0.1108 - val_accuracy: 0.9798 - val_loss: 0.0660 - learning_rate: 0.0010
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━

In [9]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0


# Learning rate scheduler callback
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), callbacks=[lr_scheduler])

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Learning Rate Scheduler: {test_acc:.4f}")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.9741 - loss: 0.0838 - val_accuracy: 0.9808 - val_loss: 0.0645 - learning_rate: 0.0010
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9737 - loss: 0.0877 - val_accuracy: 0.9812 - val_loss: 0.0614 - learning_rate: 0.0010
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9736 - loss: 0.0872 - val_accuracy: 0.9832 - val_loss: 0.0581 - learning_rate: 0.0010
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9751 - loss: 0.0827 - val_accuracy: 0.9819 - val_loss: 0.0599 - learning_rate: 0.0010
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9770 - loss: 0.0738 - val_accuracy: 0.9842 - val_loss: 0.0558 - learning_rate: 0.0010
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━

### Add Early Stopping to Prevent Overtraining

In [10]:
from tensorflow.keras.callbacks import EarlyStopping

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test), callbacks=[early_stopping])

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Early Stopping: {test_acc:.4f}")


Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9811 - loss: 0.0629 - val_accuracy: 0.9839 - val_loss: 0.0538
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9807 - loss: 0.0616 - val_accuracy: 0.9829 - val_loss: 0.0564
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9813 - loss: 0.0618 - val_accuracy: 0.9836 - val_loss: 0.0594
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9812 - loss: 0.0650 - val_accuracy: 0.9843 - val_loss: 0.0520
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9808 - loss: 0.0636 - val_accuracy: 0.9856 - val_loss: 0.0548
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9816 - loss: 0.0620 - val_accuracy: 0.9844 - val_loss: 0.0530
Epoch 7/20

In [11]:
from tensorflow.keras.callbacks import EarlyStopping

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values (0-255 -> 0-1)
x_train, x_test = x_train / 255.0, x_test / 255.0


# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test), callbacks=[early_stopping])

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy with Early Stopping: {test_acc:.4f}")

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 8ms/step - accuracy: 0.9829 - loss: 0.0537 - val_accuracy: 0.9847 - val_loss: 0.0551
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9835 - loss: 0.0538 - val_accuracy: 0.9844 - val_loss: 0.0575
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9824 - loss: 0.0552 - val_accuracy: 0.9855 - val_loss: 0.0530
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9854 - loss: 0.0474 - val_accuracy: 0.9846 - val_loss: 0.0562
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9840 - loss: 0.0503 - val_accuracy: 0.9859 - val_loss: 0.0534
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9857 - loss: 0.0470 - val_accuracy: 0.9846 - val_loss: 0.0563
Epoch 7/20

### Optimized Model with All Improvements

In [12]:
from tensorflow.keras.optimizers import Adam

# Define improved ANN model
model = Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(10, activation='softmax')  # Output layer for classification
])

# Compile model with optimized learning rate
optimizer = Adam(learning_rate=0.001)  
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define callbacks for adaptive learning rate and early stopping
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
    EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)
]

# Train model with all improvements
model.fit(x_train, y_train, epochs=20, batch_size=128, validation_data=(x_test, y_test), callbacks=callbacks)

# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Final Test Accuracy: {test_acc:.4f}")


Epoch 1/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.7611 - loss: 0.7827 - val_accuracy: 0.9515 - val_loss: 0.1569 - learning_rate: 0.0010
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9374 - loss: 0.2112 - val_accuracy: 0.9651 - val_loss: 0.1121 - learning_rate: 0.0010
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9546 - loss: 0.1547 - val_accuracy: 0.9730 - val_loss: 0.0896 - learning_rate: 0.0010
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9625 - loss: 0.1265 - val_accuracy: 0.9728 - val_loss: 0.0842 - learning_rate: 0.0010
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.9666 - loss: 0.1127 - val_accuracy: 0.9755 - val_loss: 0.0809 - learning_rate: 0.0010
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[