In [1]:
# Step 1: Dataset
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print("Train shape:", x_train.shape, y_train.shape)
print("Test shape:", x_test.shape, y_test.shape)

# Normalize (0-255 → 0-1)
x_train = x_train / 255.0
x_test = x_test / 255.0
# Why normalize? → To make training faster & stable by keeping values small.

Train shape: (60000, 28, 28) (60000,)
Test shape: (10000, 28, 28) (10000,)


In [2]:
# Step 2: Model
model = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),   # 784 → input layer
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])
model.summary()   # Prints architecture

  super().__init__(**kwargs)


In [3]:
# Step 3: Compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
# Why sparse_categorical_crossentropy? → Labels are integers, not one-hot encoded.

In [4]:
# Step 4: Train
print("\nTraining for 4 epochs:")
history_4 = model.fit(x_train, y_train, epochs=4, validation_split=0.1)

print("\nTraining for 10 epochs:")
history_10 = model.fit(x_train, y_train, epochs=10, validation_split=0.1)
# Accuracy may not always improve → model can overfit.


Training for 4 epochs:
Epoch 1/4
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9213 - loss: 0.2654 - val_accuracy: 0.9665 - val_loss: 0.1164
Epoch 2/4
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9650 - loss: 0.1122 - val_accuracy: 0.9747 - val_loss: 0.0940
Epoch 3/4
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9765 - loss: 0.0763 - val_accuracy: 0.9780 - val_loss: 0.0819
Epoch 4/4
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9817 - loss: 0.0594 - val_accuracy: 0.9783 - val_loss: 0.0763

Training for 10 epochs:
Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9853 - loss: 0.0457 - val_accuracy: 0.9777 - val_loss: 0.0784
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.9881 - loss: 0.0362 - val_a

In [5]:
# Step 5: Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print("\nTest accuracy:", test_acc)
# Compare train vs test accuracy → gap means overfitting.


Test accuracy: 0.9765999913215637


In [6]:
# Step 6: Predict
pred_1 = model.predict(x_test[:1])
print("\nPredicted label (1st image):", pred_1.argmax())
print("Actual label:", y_test[0])

pred_5 = model.predict(x_test[4:5])
print("\nPredicted label (5th image):", pred_5.argmax())
print("Actual label:", y_test[4])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step

Predicted label (1st image): 7
Actual label: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step

Predicted label (5th image): 4
Actual label: 4


In [7]:
# Step 7: Experiments
# (a) Add Dense(32, ReLU)
model2 = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10, activation='softmax')
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.fit(x_train, y_train, epochs=5, verbose=0)
print("\nExtra Dense(32): Test acc =", model2.evaluate(x_test, y_test, verbose=0)[1])


Extra Dense(32): Test acc = 0.972000002861023


In [8]:
# (b) Optimizer = SGD
model3 = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])
model3.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model3.fit(x_train, y_train, epochs=5, verbose=0)
print("SGD Optimizer: Test acc =", model3.evaluate(x_test, y_test, verbose=0)[1])

SGD Optimizer: Test acc = 0.9498999714851379


In [9]:
# (c) Replace ReLU with Tanh
model4 = keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(128, activation='tanh'),
    layers.Dense(64, activation='tanh'),
    layers.Dense(10, activation='softmax')
])
model4.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model4.fit(x_train, y_train, epochs=5, verbose=0)
print("Tanh Activation: Test acc =", model4.evaluate(x_test, y_test, verbose=0)[1])

Tanh Activation: Test acc = 0.9765999913215637
