In [1]:
# Lab 4 - MNIST Shallow Neural Network
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Load MNIST data
(xTrain, yTrain), (xTest, yTest) = tf.keras.datasets.mnist.load_data()

# Normalize to [0,1]
xTrain = xTrain.astype('float32') / 255.0
xTest = xTest.astype('float32') / 255.0

# Q1: Print shapes
print(f"Q1 - Data shapes:")
print(f"xTrain: {xTrain.shape}, yTrain: {yTrain.shape}")
print(f"xTest: {xTest.shape}, yTest: {yTest.shape}")

Q1 - Data shapes:
xTrain: (60000, 28, 28), yTrain: (60000,)
xTest: (10000, 28, 28), yTest: (10000,)


In [2]:
# Q2: Create 80/20 train/validation split
splitIdx = int(0.8 * len(xTrain))
xVal = xTrain[splitIdx:]
yVal = yTrain[splitIdx:]
xTrain = xTrain[:splitIdx]
yTrain = yTrain[:splitIdx]

print(f"Q2 - Split sizes: Train {xTrain.shape[0]}, Val {xVal.shape[0]}")

Q2 - Split sizes: Train 48000, Val 12000


In [4]:
# Q3: Layer parameter documentation
"""
Flatten(input_shape): Reshapes input tensor to 1D, input_shape specifies expected input dimensions
Dense(units, activation): Fully connected layer, units=number of neurons, activation=activation function  
Dropout(rate): Randomly sets rate fraction of inputs to 0 during training for regularization
"""

modelA = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

lossFn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
modelA.compile(optimizer='adam', loss=lossFn, metrics=['accuracy'])

# Q4: Print output shape (build model first)
modelA.build(input_shape=(None, 28, 28))
print(f"Q4 - Model A output shape: {modelA.output_shape}")

Q4 - Model A output shape: (None, 10)


In [5]:
historyA = modelA.fit(xTrain, yTrain, epochs=5, validation_data=(xVal, yVal), verbose=1)

# Q5: Final validation accuracy
finalValAccA = historyA.history['val_accuracy'][-1]
print(f"Q5 - Model A final validation accuracy: {finalValAccA:.4f}")

# Q6: Test accuracy
testLossA, testAccA = modelA.evaluate(xTest, yTest, verbose=0)
print(f"Q6 - Model A test accuracy: {testAccA:.4f}")

Epoch 1/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9038 - loss: 0.3325 - val_accuracy: 0.9550 - val_loss: 0.1655
Epoch 2/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9526 - loss: 0.1616 - val_accuracy: 0.9663 - val_loss: 0.1190
Epoch 3/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9647 - loss: 0.1180 - val_accuracy: 0.9687 - val_loss: 0.1023
Epoch 4/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9704 - loss: 0.0958 - val_accuracy: 0.9720 - val_loss: 0.0944
Epoch 5/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9754 - loss: 0.0801 - val_accuracy: 0.9743 - val_loss: 0.0869
Q5 - Model A final validation accuracy: 0.9743
Q6 - Model A test accuracy: 0.9756


In [6]:
modelB = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(10)
])

modelB.compile(optimizer='adam', loss=lossFn, metrics=['accuracy'])

historyB = modelB.fit(xTrain, yTrain, epochs=5, validation_data=(xVal, yVal), verbose=1)

# Q7: Final validation and test accuracy
finalValAccB = historyB.history['val_accuracy'][-1]
testLossB, testAccB = modelB.evaluate(xTest, yTest, verbose=0)

print(f"Q7 - Model B final validation accuracy: {finalValAccB:.4f}")
print(f"Q7 - Model B test accuracy: {testAccB:.4f}")

Epoch 1/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9315 - loss: 0.2307 - val_accuracy: 0.9606 - val_loss: 0.1271
Epoch 2/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9706 - loss: 0.0973 - val_accuracy: 0.9684 - val_loss: 0.1024
Epoch 3/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9791 - loss: 0.0660 - val_accuracy: 0.9698 - val_loss: 0.0991
Epoch 4/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9857 - loss: 0.0465 - val_accuracy: 0.9678 - val_loss: 0.1135
Epoch 5/5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9877 - loss: 0.0366 - val_accuracy: 0.9693 - val_loss: 0.1103
Q7 - Model B final validation accuracy: 0.9693
Q7 - Model B test accuracy: 0.9724
