# Lab 03: TensorFlow vs. PyTorch
- Train a model on MNIST in both TensorFlow and PyTorch, convert to TFLite and ONNX.  
- Use tf.GradientTape for Tensorflow custom training loop.



## TensorFlow Implementation

In [15]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values to range [0, 1]
x_train = x_train / 255.0
x_test = x_test / 255.0

# One-hot encode the labels
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Build a simple feedforward neural network
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),             # Input shape matching MNIST image size
    tf.keras.layers.Flatten(),                         # Flatten 28x28 images to 1D vectors
    tf.keras.layers.Dense(64, activation='relu'),      # Hidden layer with 64 neurons and ReLU activation
    tf.keras.layers.Dense(10, activation='softmax')    # Output layer with 10 neurons (one per digit class)
])

# Compile the model with Adam optimizer and categorical crossentropy loss
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model and measure training time
start = time.time()
model.fit(x_train, y_train, epochs=5)
end = time.time()
print(f"TF Training time: {end-start:.2f} seconds")     # Print the training duration

# Evaluate the model on the test set
model.evaluate(x_test, y_test)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8584 - loss: 0.4946
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9541 - loss: 0.1580
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9685 - loss: 0.1083
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9749 - loss: 0.0824
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9792 - loss: 0.0673
TF Training time: 25.27 seconds
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9675 - loss: 0.1047


[0.09060510993003845, 0.9714000225067139]

## Convert TensorFlow model to TFLite

In [16]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmpsf7f1y9u'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name='keras_tensor_27')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  132705840588048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132705840593040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132705840585936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132705840587856: TensorSpec(shape=(), dtype=tf.resource, name=None)


## PyTorch Implementation

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time  # For measuring training time

# 1. Define a transform to convert PIL images to tensors and flatten them (28x28 → 784)
transform = transforms.Compose([
    transforms.ToTensor(),                      # Convert image to PyTorch tensor [1, 28, 28]
    transforms.Lambda(lambda x: x.view(-1))     # Flatten the tensor to shape [784]
])

# 2. Load the MNIST dataset with training and test data
train_loader = DataLoader(
    datasets.MNIST(root='./data', train=True, transform=transform, download=True),
    batch_size=32, shuffle=True                 # Shuffle training data for better generalization
)
test_loader = DataLoader(
    datasets.MNIST(root='./data', train=False, transform=transform, download=True),
    batch_size=1000                             # Larger batch size for faster testing
)

# 3. Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 128)          # Fully connected layer: input 784 → hidden 128 neurons
        self.fc2 = nn.Linear(128, 10)           # Output layer: hidden 128 → output 10 classes (digits 0–9)

    def forward(self, x):
        x = F.relu(self.fc1(x))                 # Apply ReLU activation after first layer
        return self.fc2(x)                      # Return raw output logits for each class

# 4. Initialize model, loss function, and optimizer
model = Net()                                   # Instantiate the model
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001
loss_fn = nn.CrossEntropyLoss()                # Cross-entropy loss for multi-class classification

# 5. Training loop for 5 epochs
start = time.time()
for epoch in range(5):
    for x, y in train_loader:
        optimizer.zero_grad()                  # Reset gradients from previous step
        pred = model(x)                        # Forward pass: compute predictions
        loss = loss_fn(pred, y)                # Compute loss between predictions and ground truth
        loss.backward()                        # Backward pass: compute gradients
        optimizer.step()                       # Update model weights
end = time.time()

print(f"PyTorch Training time: {end - start:.2f} seconds")  # Print total training time

# 6. Evaluation on the test dataset
model.eval()                                   # Set model to evaluation mode
correct = 0
with torch.no_grad():                          # Disable gradient computation for inference
    for x, y in test_loader:
        output = model(x)                      # Compute model outputs
        pred = output.argmax(1)                # Choose class with highest predicted score
        correct += (pred == y).sum().item()    # Count number of correct predictions

# 7. Calculate and display overall test accuracy
accuracy = correct / len(test_loader.dataset)
print(f"Test accuracy: {accuracy:.4f}")


PyTorch Training time: 54.20 seconds
Test accuracy: 0.9723


## Convert PyTorch model to ONNX

In [18]:
# Install ONNX
!pip install onnx



In [19]:
dummy_input = torch.randn(1, 784)
torch.onnx.export(model, dummy_input, "model.onnx",
                  input_names=["input"], output_names=["output"])

## TensorFlow custom training loop using tf.GradientTape

In [20]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load the MNIST dataset and normalize pixel values to the [0, 1] range
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0           # Normalize training images
x_test = x_test / 255.0             # Normalize test images
y_train = to_categorical(y_train)   # Convert labels to one-hot encoded vectors
y_test = to_categorical(y_test)

# Create TensorFlow dataset objects for efficient data loading and batching
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
                              .shuffle(10000) \
                              .batch(batch_size)          # Shuffle and batch training data
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) \
                             .batch(batch_size)             # Batch test data (no shuffle)

# Build a simple feedforward neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),        # Input layer for 28x28 pixel images
    tf.keras.layers.Flatten(),                      # Flatten 2D images to 1D vectors (784 elements)
    tf.keras.layers.Dense(128, activation='relu'), # Fully connected hidden layer with ReLU activation
    tf.keras.layers.Dense(10, activation='softmax')# Output layer with softmax activation for 10 classes
])

# Specify the loss function, optimizer, and accuracy metric
loss_fn = tf.keras.losses.CategoricalCrossentropy()    # Suitable for one-hot encoded labels
optimizer = tf.keras.optimizers.Adam()                 # Adam optimizer with default parameters
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()  # Track accuracy on training data
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()   # Track accuracy on test data

# Training loop over specified number of epochs
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)           # Forward pass with training=True
            loss = loss_fn(y_batch, logits)                   # Calculate loss
        grads = tape.gradient(loss, model.trainable_variables)  # Compute gradients
        optimizer.apply_gradients(zip(grads, model.trainable_variables))  # Update weights

        train_acc_metric.update_state(y_batch, logits)      # Update training accuracy metric

        # Print loss and accuracy every 100 batches
        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    # Output training accuracy at the end of each epoch
    print(f"Training Accuracy for epoch {epoch+1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()    # Reset metric for next epoch
end = time.time()

print(f"\nTF Training time: {end - start:.2f} seconds")   # Print total training time

# Evaluate the trained model on the test dataset
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)          # Forward pass with training=False
    test_acc_metric.update_state(y_batch, test_logits)   # Update test accuracy metric

print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")  # Print final test accuracy



Epoch 1/5
Step 0, Loss: 2.2792, Accuracy: 0.1250
Step 100, Loss: 0.5291, Accuracy: 0.7580
Step 200, Loss: 0.3346, Accuracy: 0.8212
Step 300, Loss: 0.3449, Accuracy: 0.8495
Step 400, Loss: 0.2041, Accuracy: 0.8678
Step 500, Loss: 0.1524, Accuracy: 0.8788
Step 600, Loss: 0.3299, Accuracy: 0.8866
Step 700, Loss: 0.1363, Accuracy: 0.8921
Step 800, Loss: 0.4981, Accuracy: 0.8967
Step 900, Loss: 0.1114, Accuracy: 0.9016
Step 1000, Loss: 0.2660, Accuracy: 0.9055
Step 1100, Loss: 0.0968, Accuracy: 0.9086
Step 1200, Loss: 0.1511, Accuracy: 0.9115
Step 1300, Loss: 0.1934, Accuracy: 0.9139
Step 1400, Loss: 0.2290, Accuracy: 0.9159
Step 1500, Loss: 0.4602, Accuracy: 0.9182
Step 1600, Loss: 0.2161, Accuracy: 0.9208
Step 1700, Loss: 0.1816, Accuracy: 0.9227
Step 1800, Loss: 0.0491, Accuracy: 0.9247
Training Accuracy for epoch 1: 0.9259

Epoch 2/5
Step 0, Loss: 0.0989, Accuracy: 0.9688
Step 100, Loss: 0.0830, Accuracy: 0.9576
Step 200, Loss: 0.0113, Accuracy: 0.9579
Step 300, Loss: 0.1109, Accuracy:

## Performance Otimization with Graph Execution using @tf.function

In [21]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load MNIST dataset and normalize pixel values to [0, 1]
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0                        # Scale training images
x_test = x_test / 255.0                          # Scale test images
y_train = to_categorical(y_train, num_classes=10)  # One-hot encode training labels
y_test = to_categorical(y_test, num_classes=10)    # One-hot encode test labels

# Create TensorFlow datasets for efficient data loading and batching
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
                              .shuffle(10000) \
                              .batch(batch_size)          # Shuffle and batch training data
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) \
                             .batch(batch_size)             # Batch test data without shuffling

# Define a simple feedforward neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),        # Input layer for 28x28 grayscale images
    tf.keras.layers.Flatten(),                      # Flatten 2D images to 1D vectors (784 elements)
    tf.keras.layers.Dense(128, activation='relu'), # Fully connected hidden layer with ReLU activation
    tf.keras.layers.Dense(10, activation='softmax')# Output layer with 10 neurons for classification
])

# Specify loss function, optimizer, and accuracy metrics
loss_fn = tf.keras.losses.CategoricalCrossentropy()  # Cross-entropy loss for one-hot labels
optimizer = tf.keras.optimizers.Adam()               # Adam optimizer with default params
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()  # Metric to track training accuracy
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()   # Metric to track test accuracy

@tf.function  # Compiles the function into a high-performance TensorFlow graph
def train_step(x_batch, y_batch):
    # Perform one training step: forward pass, loss calculation, backpropagation, and weights update
    with tf.GradientTape() as tape:
        logits = model(x_batch, training=True)      # Forward pass (training mode)
        loss = loss_fn(y_batch, logits)              # Compute loss between true and predicted labels
    grads = tape.gradient(loss, model.trainable_variables)  # Compute gradients
    optimizer.apply_gradients(zip(grads, model.trainable_variables))  # Update model weights
    train_acc_metric.update_state(y_batch, logits)       # Update accuracy metric with current batch
    return loss

# Main training loop running for specified number of epochs
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        loss = train_step(x_batch, y_batch)           # Execute one training step

        # Print loss and accuracy every 100 steps for monitoring
        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    # Display accuracy after each epoch and reset metric for next epoch
    print(f"Training Accuracy for epoch {epoch + 1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()

end = time.time()
print(f"\nTF Training time: {end - start:.2f} seconds")  # Total training duration

# Evaluate the model performance on the test dataset
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)          # Forward pass in inference mode
    test_acc_metric.update_state(y_batch, test_logits)   # Update test accuracy metric

# Print final test accuracy
print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")



Epoch 1/5
Step 0, Loss: 2.2662, Accuracy: 0.1875
Step 100, Loss: 0.4300, Accuracy: 0.7729
Step 200, Loss: 0.4813, Accuracy: 0.8287
Step 300, Loss: 0.2923, Accuracy: 0.8525
Step 400, Loss: 0.4872, Accuracy: 0.8663
Step 500, Loss: 0.1942, Accuracy: 0.8774
Step 600, Loss: 0.3352, Accuracy: 0.8847
Step 700, Loss: 0.4330, Accuracy: 0.8906
Step 800, Loss: 0.4779, Accuracy: 0.8956
Step 900, Loss: 0.0696, Accuracy: 0.9005
Step 1000, Loss: 0.1130, Accuracy: 0.9043
Step 1100, Loss: 0.0988, Accuracy: 0.9071
Step 1200, Loss: 0.2325, Accuracy: 0.9096
Step 1300, Loss: 0.2656, Accuracy: 0.9112
Step 1400, Loss: 0.1582, Accuracy: 0.9139
Step 1500, Loss: 0.0839, Accuracy: 0.9165
Step 1600, Loss: 0.3507, Accuracy: 0.9186
Step 1700, Loss: 0.0859, Accuracy: 0.9211
Step 1800, Loss: 0.2957, Accuracy: 0.9230
Training Accuracy for epoch 1: 0.9246

Epoch 2/5
Step 0, Loss: 0.1144, Accuracy: 0.9688
Step 100, Loss: 0.0208, Accuracy: 0.9588
Step 200, Loss: 0.1308, Accuracy: 0.9588
Step 300, Loss: 0.1193, Accuracy: