In [7]:
# Step 1: Dataset
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

print("Training set shape:", X_train.shape, y_train.shape)
print("Test set shape:", X_test.shape, y_test.shape)

# Normalize pixel values (0-255 → 0-1)
X_train = X_train / 255.0
X_test = X_test / 255.0

Training set shape: (60000, 28, 28) (60000,)
Test set shape: (10000, 28, 28) (10000,)


In [8]:
# Step 2: Model
model = Sequential([
    Input(shape=(28, 28)),     # Use Input layer here
    Flatten(),                  # Flatten no longer needs input_shape
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

model.summary()

In [9]:
# Step 3: Compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
# Step 4: Train
history_4 = model.fit(X_train, y_train, epochs=4, validation_data=(X_test, y_test), verbose=2)
history_10 = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=2)

Epoch 1/4
1875/1875 - 6s - 3ms/step - accuracy: 0.9296 - loss: 0.2404 - val_accuracy: 0.9641 - val_loss: 0.1204
Epoch 2/4
1875/1875 - 3s - 2ms/step - accuracy: 0.9701 - loss: 0.0992 - val_accuracy: 0.9684 - val_loss: 0.1021
Epoch 3/4
1875/1875 - 4s - 2ms/step - accuracy: 0.9773 - loss: 0.0713 - val_accuracy: 0.9729 - val_loss: 0.0835
Epoch 4/4
1875/1875 - 5s - 2ms/step - accuracy: 0.9831 - loss: 0.0535 - val_accuracy: 0.9709 - val_loss: 0.0996
Epoch 1/10
1875/1875 - 5s - 3ms/step - accuracy: 0.9862 - loss: 0.0423 - val_accuracy: 0.9739 - val_loss: 0.0856
Epoch 2/10
1875/1875 - 5s - 3ms/step - accuracy: 0.9890 - loss: 0.0348 - val_accuracy: 0.9774 - val_loss: 0.0762
Epoch 3/10
1875/1875 - 5s - 2ms/step - accuracy: 0.9896 - loss: 0.0312 - val_accuracy: 0.9737 - val_loss: 0.0945
Epoch 4/10
1875/1875 - 5s - 2ms/step - accuracy: 0.9920 - loss: 0.0239 - val_accuracy: 0.9756 - val_loss: 0.0893
Epoch 5/10
1875/1875 - 5s - 2ms/step - accuracy: 0.9923 - loss: 0.0231 - val_accuracy: 0.9789 - val_

In [14]:
# Step 5: Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print("Test accuracy:", test_acc)

313/313 - 1s - 2ms/step - accuracy: 0.9780 - loss: 0.1027
Test accuracy: 0.9779999852180481


In [15]:
# Step 6: Predict
import numpy as np
import matplotlib.pyplot as plt

# Predict first test image
pred1 = model.predict(X_test[:1])
print("Predicted label (first image):", np.argmax(pred1))
print("Actual label:", y_test[0])

# Predict 5th test image
pred5 = model.predict(X_test[4:5])
print("Predicted label (fifth image):", np.argmax(pred5))
print("Actual label:", y_test[4])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
Predicted label (first image): 7
Actual label: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Predicted label (fifth image): 4
Actual label: 4


In [21]:
# Step 7: Experiment
# (a) Add another Dense(32, ReLU)
model2 = Sequential([
    Input(shape=(28, 28)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_exp1 = model2.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test), verbose=2)

# (b) Change optimizer to SGD
model3 = Sequential([
    Input(shape=(28, 28)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
model3.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_exp2 = model3.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test), verbose=2)

# (c) Replace ReLU with tanh
model4 = Sequential([
    Input(shape=(28, 28)),
    Flatten(),
    Dense(128, activation='tanh'),
    Dense(64, activation='tanh'),
    Dense(10, activation='softmax')
])
model4.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history_exp3 = model4.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test), verbose=2)


Epoch 1/5
1875/1875 - 7s - 4ms/step - accuracy: 0.9265 - loss: 0.2467 - val_accuracy: 0.9626 - val_loss: 0.1227
Epoch 2/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9678 - loss: 0.1038 - val_accuracy: 0.9711 - val_loss: 0.0937
Epoch 3/5
1875/1875 - 4s - 2ms/step - accuracy: 0.9773 - loss: 0.0733 - val_accuracy: 0.9720 - val_loss: 0.0913
Epoch 4/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9816 - loss: 0.0578 - val_accuracy: 0.9766 - val_loss: 0.0829
Epoch 5/5
1875/1875 - 5s - 3ms/step - accuracy: 0.9857 - loss: 0.0463 - val_accuracy: 0.9681 - val_loss: 0.1051
Epoch 1/5
1875/1875 - 3s - 2ms/step - accuracy: 0.8208 - loss: 0.6724 - val_accuracy: 0.9115 - val_loss: 0.3158
Epoch 2/5
1875/1875 - 3s - 2ms/step - accuracy: 0.9170 - loss: 0.2928 - val_accuracy: 0.9258 - val_loss: 0.2490
Epoch 3/5
1875/1875 - 4s - 2ms/step - accuracy: 0.9312 - loss: 0.2402 - val_accuracy: 0.9389 - val_loss: 0.2155
Epoch 4/5
1875/1875 - 4s - 2ms/step - accuracy: 0.9410 - loss: 0.2067 - val_accuracy: 0.9445 - val_loss: