In [8]:
# 1. Use the Sequential API to build a simple feedforward neural network.
!pip install tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# 1. Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 2. Build a simple feedforward neural network
model = Sequential([
    Flatten(input_shape=(28, 28)),           # Flatten 28x28 image to 1D
    Dense(128, activation='relu'),           # Hidden layer 1
    Dense(64, activation='relu'),            # Hidden layer 2
    Dense(32, activation='relu'),            # Hidden layer 3
    Dense(32, activation='relu'),            # Hidden layer 4
    Dense(16, activation='relu'),            # Hidden layer 5
    Dense(10, activation='softmax')          # Output layer for 10 classes
])

# 3. Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 4. Train the model
model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.1)

# 5. Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\n✅ Test Accuracy: {test_acc * 100:.2f}%")


Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.6671 - loss: 0.9851 - val_accuracy: 0.9553 - val_loss: 0.1683
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9503 - loss: 0.1746 - val_accuracy: 0.9640 - val_loss: 0.1303
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9675 - loss: 0.1161 - val_accuracy: 0.9708 - val_loss: 0.1023
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9747 - loss: 0.0859 - val_accuracy: 0.9717 - val_loss: 0.0969
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9816 - loss: 0.0629 - val_accuracy: 0.9707 - val_loss: 0.1056
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9850 - loss: 0.0503 - val_accuracy: 0.9743 - val_loss: 0.0941
Epoch 7/10
[1m422/422[0m 

Step 2: Add 5 Hidden Layers Using ReLU and a Softmax Output Layer
I construct a Sequential neural network that contains:

Five hidden layers

Each hidden layer uses the ReLU activation function

The final output layer uses the Softmax activation function

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Build the model
model = Sequential([
    Flatten(input_shape=(28, 28)),          # Input layer to flatten 28x28 pixels

    Dense(128, activation='relu'),          # Hidden Layer 1
    Dense(64, activation='relu'),           # Hidden Layer 2
    Dense(64, activation='relu'),           # Hidden Layer 3
    Dense(32, activation='relu'),           # Hidden Layer 4
    Dense(32, activation='relu'),           # Hidden Layer 5

    Dense(10, activation='softmax')         # Output Layer: 10 classes (digits 0–9)
])


In [10]:
# 3. Try adding more layers or using a different number of neurons
#  Add More Layers
model = Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(128, activation='relu'),   # Hidden Layer 1
    Dense(128, activation='relu'),   # Hidden Layer 2
    Dense(64, activation='relu'),    # Hidden Layer 3
    Dense(64, activation='relu'),    # Hidden Layer 4
    Dense(32, activation='relu'),    # Hidden Layer 5
    Dense(32, activation='relu'),    # Hidden Layer 6
    Dense(16, activation='relu'),    # Hidden Layer 7

    Dense(10, activation='softmax')  # Output Layer
])


In [11]:
#Changing Neurons (Fewer or More)
model = Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(256, activation='relu'),   # More neurons
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),

    Dense(10, activation='softmax')
])


In [12]:
# For smaller network:
model = Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),

    Dense(10, activation='softmax')
])


In [13]:
# 4. Experiment with different activation functions (e.g., tanh, sigmoid).
#  Using tanh in all hidden layers
model = tf.keras.Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(128, activation='tanh'),
    Dense(64, activation='tanh'),
    Dense(64, activation='tanh'),
    Dense(32, activation='tanh'),
    Dense(32, activation='tanh'),

    Dense(10, activation='softmax')  # output stays softmax
])


In [14]:
 #  Using sigmoid in all hidden layers
 model = tf.keras.Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(128, activation='sigmoid'),
    Dense(64, activation='sigmoid'),
    Dense(64, activation='sigmoid'),
    Dense(32, activation='sigmoid'),
    Dense(32, activation='sigmoid'),

    Dense(10, activation='softmax')
])


In [15]:
#  Mix activations
model = tf.keras.Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(128, activation='tanh'),
    Dense(64, activation='relu'),
    Dense(64, activation='sigmoid'),
    Dense(32, activation='relu'),
    Dense(32, activation='tanh'),

    Dense(10, activation='softmax')
])


In [16]:
#Using LeakyReLU (advanced)
from tensorflow.keras.layers import LeakyReLU

model = tf.keras.Sequential([
    Flatten(input_shape=(28, 28)),

    Dense(128), LeakyReLU(alpha=0.1),
    Dense(64), LeakyReLU(alpha=0.1),
    Dense(64), LeakyReLU(alpha=0.1),
    Dense(32), LeakyReLU(alpha=0.1),
    Dense(32), LeakyReLU(alpha=0.1),

    Dense(10, activation='softmax')
])




In [18]:
#5. Compare results with different optimizers like sgd, rmsprop, etc.
#By using Different Optimizers in Code
# For SGD optimizer
model.compile(
    optimizer='sgd',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# For RMSprop
model.compile(
    optimizer='rmsprop',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# For Adam
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [19]:
# Full Example: Compare Optimizers Automatically
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

optimizers = ['sgd', 'rmsprop', 'adam']

for opt_name in optimizers:
    print(f"\nTraining with optimizer: {opt_name}")

    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer=opt_name,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    model.fit(x_train, y_train, epochs=5, batch_size=128, verbose=0)
    loss, acc = model.evaluate(x_test, y_test, verbose=0)
    print(f"Test accuracy with {opt_name}: {acc * 100:.2f}%")



Training with optimizer: sgd
Test accuracy with sgd: 91.93%

Training with optimizer: rmsprop
Test accuracy with rmsprop: 97.24%

Training with optimizer: adam
Test accuracy with adam: 97.39%


In [20]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

# 1. Load and normalize data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# 2. One-hot encode targets
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 3. Build model with 5 hidden layers + Dropout for regularization
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

# 4. Compile model with Adam optimizer and smaller learning rate
adam = Adam(learning_rate=0.001)
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 5. Train model for 25 epochs with smaller batch size
model.fit(x_train, y_train, epochs=25, batch_size=64, validation_split=0.1)

# 6. Evaluate on test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\n✅ Test Accuracy: {test_acc * 100:.2f}%")


Epoch 1/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7531 - loss: 0.7436 - val_accuracy: 0.9608 - val_loss: 0.1262
Epoch 2/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9456 - loss: 0.1849 - val_accuracy: 0.9720 - val_loss: 0.0934
Epoch 3/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9591 - loss: 0.1410 - val_accuracy: 0.9783 - val_loss: 0.0707
Epoch 4/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9643 - loss: 0.1153 - val_accuracy: 0.9738 - val_loss: 0.0853
Epoch 5/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9696 - loss: 0.0997 - val_accuracy: 0.9797 - val_loss: 0.0655
Epoch 6/25
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9713 - loss: 0.0945 - val_accuracy: 0.9808 - val_loss: 0.0703
Epoch 7/25
[1m844/844[0m