In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255   # Fill in normalization factor
x_test = x_test / 255     # Fill in normalization factor
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),        # Fill input shape
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),  # Fill number of hidden neurons
    tf.keras.layers.Dense(10, activation='softmax')  # Fill number of output neurons
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',       # Fill name of loss function
              metrics=['accuracy'])

start = time.time()
model.fit(x_train, y_train, epochs=5)
end = time.time()
print(f"TF Training time: {end-start:.2f} seconds")       # Output training time
model.evaluate(x_test, y_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.8611 - loss: 0.5021
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - accuracy: 0.9534 - loss: 0.1587
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9689 - loss: 0.1080
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9761 - loss: 0.0807
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9798 - loss: 0.0669
TF Training time: 63.23 seconds
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9687 - loss: 0.1018


[0.08895564824342728, 0.9722999930381775]

In [2]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmp8qr843_z'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  137779655964624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137779654151632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137779663613648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137779654152016: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time  # Required for timing

# 1. Transform each image: convert to tensor and flatten from 28x28 → 784
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))  # Flatten: [1, 28, 28] → [784]
])

# 2. Load MNIST dataset
train_loader = DataLoader(
    datasets.MNIST(root='./data', train=True, transform=transform, download=True),
    batch_size=32, shuffle=True
)
test_loader = DataLoader(
    datasets.MNIST(root='./data', train=False, transform=transform, download=True),
    batch_size=1000
)

# 3. Define the neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 128)   # Input size = 784 (28x28), Hidden layer = 128 neurons
        self.fc2 = nn.Linear(128, 10)    # Hidden = 128 → Output size = 10 classes (digits 0-9)

    def forward(self, x):
        x = F.relu(self.fc1(x))          # Apply ReLU activation after first layer
        return self.fc2(x)               # Output logits from second layer

# 4. Initialize model, optimizer and loss function
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate
loss_fn = nn.CrossEntropyLoss()  # CrossEntropyLoss for classification

# 5. Train the model
start = time.time()
for epoch in range(5):
    for x, y in train_loader:
        optimizer.zero_grad()       # Clear gradients
        pred = model(x)             # Forward pass
        loss = loss_fn(pred, y)     # Compute loss
        loss.backward()             # Backpropagate
        optimizer.step()            # Update weights
end = time.time()

print(f"PyTorch Training time: {end - start:.2f} seconds")

# 6. Evaluate on test data
model.eval()
correct = 0
with torch.no_grad():
    for x, y in test_loader:
        output = model(x)
        pred = output.argmax(1)             # Pick class with highest score
        correct += (pred == y).sum().item() # Count correct predictions

accuracy = correct / len(test_loader.dataset)
print(f"Test accuracy: {accuracy:.4f}")


100%|██████████| 9.91M/9.91M [00:00<00:00, 51.2MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.61MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.2MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.52MB/s]


PyTorch Training time: 81.52 seconds
Test accuracy: 0.9765


In [4]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m84.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.18.0


In [5]:
dummy_input = torch.randn(1, 784)
torch.onnx.export(model, dummy_input, "model.onnx",
                  input_names=["input"], output_names=["output"])

In [6]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0   # Normalize pixel values to range [0, 1]
x_test = x_test / 255.0     # Same normalization for test data
y_train = to_categorical(y_train)  # Convert labels to one-hot vectors
y_test = to_categorical(y_test)

# Prepare datasets
batch_size = 32         # Same as in your PyTorch example
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),           # Each MNIST image is 28x28
    tf.keras.layers.Flatten(),                       # Flatten to 784-dim vector
    tf.keras.layers.Dense(128, activation='relu'),   # Hidden layer with 128 neurons
    tf.keras.layers.Dense(10, activation='softmax')  # Output layer with 10 classes (0–9)
])

# Define loss, optimizer, and metrics
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

# Training loop
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch, training=True)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_acc_metric.update_state(y_batch, logits)

        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    print(f"Training Accuracy for epoch {epoch+1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()
end = time.time()
print(f"\nTF Training time: {end - start:.2f} seconds")

# Evaluation loop
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)
    test_acc_metric.update_state(y_batch, test_logits)

print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")



Epoch 1/5
Step 0, Loss: 2.4144, Accuracy: 0.0938
Step 100, Loss: 0.4283, Accuracy: 0.7574
Step 200, Loss: 0.2366, Accuracy: 0.8234
Step 300, Loss: 0.2029, Accuracy: 0.8491
Step 400, Loss: 0.4004, Accuracy: 0.8661
Step 500, Loss: 0.3141, Accuracy: 0.8777
Step 600, Loss: 0.2963, Accuracy: 0.8868
Step 700, Loss: 0.2365, Accuracy: 0.8930
Step 800, Loss: 0.1927, Accuracy: 0.8977
Step 900, Loss: 0.2339, Accuracy: 0.9022
Step 1000, Loss: 0.4955, Accuracy: 0.9051
Step 1100, Loss: 0.0888, Accuracy: 0.9079
Step 1200, Loss: 0.2103, Accuracy: 0.9111
Step 1300, Loss: 0.2375, Accuracy: 0.9136
Step 1400, Loss: 0.0422, Accuracy: 0.9158
Step 1500, Loss: 0.1081, Accuracy: 0.9178
Step 1600, Loss: 0.0642, Accuracy: 0.9198
Step 1700, Loss: 0.2608, Accuracy: 0.9216
Step 1800, Loss: 0.0651, Accuracy: 0.9237
Training Accuracy for epoch 1: 0.9251

Epoch 2/5
Step 0, Loss: 0.1001, Accuracy: 0.9375
Step 100, Loss: 0.0841, Accuracy: 0.9595
Step 200, Loss: 0.1352, Accuracy: 0.9597
Step 300, Loss: 0.0522, Accuracy:

In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import time

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train / 255.0   # Normalize to range [0, 1]
x_test = x_test / 255.0     # Same normalization for test data
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Prepare datasets
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),            # Each MNIST image is 28x28
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),    # Hidden layer with 128 neurons and ReLU
    tf.keras.layers.Dense(10, activation='softmax')   # Output layer with 10 neurons and softmax
])

# Define loss, optimizer, and metrics
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

@tf.function  # compile the function into a graph
def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = model(x_batch, training=True)
        loss = loss_fn(y_batch, logits)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_acc_metric.update_state(y_batch, logits)
    return loss

# Training loop
epochs = 5
start = time.time()
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        loss = train_step(x_batch, y_batch)

        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss.numpy():.4f}, Accuracy: {train_acc_metric.result().numpy():.4f}")

    print(f"Training Accuracy for epoch {epoch+1}: {train_acc_metric.result().numpy():.4f}")
    train_acc_metric.reset_state()
end = time.time()
print(f"\nTF Training time: {end - start:.2f} seconds")

# Evaluation loop
for x_batch, y_batch in test_dataset:
    test_logits = model(x_batch, training=False)
    test_acc_metric.update_state(y_batch, test_logits)

print(f"Test Accuracy: {test_acc_metric.result().numpy():.4f}")



Epoch 1/5
Step 0, Loss: 2.4717, Accuracy: 0.0625
Step 100, Loss: 0.2718, Accuracy: 0.7828
Step 200, Loss: 0.7183, Accuracy: 0.8361
Step 300, Loss: 0.2686, Accuracy: 0.8585
Step 400, Loss: 0.3196, Accuracy: 0.8710
Step 500, Loss: 0.2611, Accuracy: 0.8809
Step 600, Loss: 0.3924, Accuracy: 0.8900
Step 700, Loss: 0.1321, Accuracy: 0.8968
Step 800, Loss: 0.1009, Accuracy: 0.9013
Step 900, Loss: 0.2038, Accuracy: 0.9052
Step 1000, Loss: 0.2296, Accuracy: 0.9087
Step 1100, Loss: 0.4629, Accuracy: 0.9116
Step 1200, Loss: 0.1211, Accuracy: 0.9147
Step 1300, Loss: 0.1641, Accuracy: 0.9167
Step 1400, Loss: 0.1204, Accuracy: 0.9187
Step 1500, Loss: 0.0823, Accuracy: 0.9208
Step 1600, Loss: 0.0393, Accuracy: 0.9228
Step 1700, Loss: 0.0241, Accuracy: 0.9249
Step 1800, Loss: 0.2588, Accuracy: 0.9266
Training Accuracy for epoch 1: 0.9281

Epoch 2/5
Step 0, Loss: 0.0195, Accuracy: 1.0000
Step 100, Loss: 0.1048, Accuracy: 0.9623
Step 200, Loss: 0.1081, Accuracy: 0.9582
Step 300, Loss: 0.1399, Accuracy: