<a href="https://colab.research.google.com/github/Luke-code2025/ResponsibleAI-assignment/blob/main/week3_examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import TensorFlow. TensorFlow provides the deep learning framework and Keras API for building models.
import tensorflow as tf

# Load the MNIST dataset directly from TensorFlow's built-in datasets.
# The dataset is automatically downloaded from an online source and split into training and test sets.
# x_train and x_test contain image pixel data, while y_train and y_test contain the corresponding digit labels.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize the pixel values of the images from the original range of 0 to 255 to a range of 0 to 1.
# This scaling helps the neural network train faster and more reliably.
x_train, x_test = x_train / 255.0, x_test / 255.0

# Build a Sequential model, which is a linear stack of layers.
model = tf.keras.models.Sequential([
    # The Flatten layer converts the 2D 28x28 images into a 1D array of 784 pixels.
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    # A Dense (fully-connected) layer with 128 neurons and ReLU activation for introducing non-linearity.
    tf.keras.layers.Dense(128, activation='relu'),

    # Dropout layer randomly sets 20% of its inputs to zero during training.
    # This prevents overfitting by ensuring that the model does not rely too heavily on any particular set of features.
    tf.keras.layers.Dropout(0.2),

    # The final Dense layer with 10 neurons and softmax activation.
    # Each neuron corresponds to one of the 10 digits (0-9), and softmax outputs a probability distribution.
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model.
# - optimizer='adam': Adam optimizer adjusts the learning rate during training.
# - loss='sparse_categorical_crossentropy': This loss function is used for integer-labeled classification.
# - metrics=['accuracy']: The model will report accuracy during training and testing.
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train (fit) the model on the training data over 5 epochs.
# An epoch means one full pass through the entire training dataset.
model.fit(x_train, y_train, epochs=5)

# Evaluate the model on the test set.
# This provides an unbiased evaluation of how well the model generalizes to new, unseen data.
model.evaluate(x_test, y_test)
model.save('mnist_model.h5')
tf.keras.models.load_model('mnist_model.h5')

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8565 - loss: 0.4850
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9548 - loss: 0.1523
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9664 - loss: 0.1091
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9729 - loss: 0.0887
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9771 - loss: 0.0742
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9741 - loss: 0.0880




<Sequential name=sequential_1, built=True>

In [3]:
# Import necessary PyTorch libraries and modules.
import torch                              # Main PyTorch package.
import torch.nn as nn                     # Provides neural network building blocks.
import torch.optim as optim               # Provides optimization algorithms.
import torch.nn.functional as F           # Contains useful functions like activation functions.
from torchvision import datasets, transforms  # For loading and transforming datasets.
from torch.utils.data import DataLoader   # Helps in batching and shuffling the data.

# Define the data transformation.
# - transforms.ToTensor(): Converts a PIL Image or numpy array (pixel values 0-255) into a FloatTensor (values between 0.0 and 1.0).
# - transforms.Normalize((0.1307,), (0.3081,)): Normalizes the tensor with given mean and standard deviation, computed on MNIST.
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load the training and test datasets.
# - root='./data': Directory where the datasets will be stored.
# - train=True/False: Specifies if we're downloading the training or test set.
# - download=True: Downloads the dataset if it's not already present locally.
# - transform: Applies the transformation defined above to the data.
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Wrap the datasets in DataLoader objects to enable batch processing and shuffling.
# - batch_size: Number of samples per batch.
# - shuffle=True for training data to randomize data order each epoch.
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Define a simple neural network model by subclassing nn.Module.
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        # Define the first fully connected (dense) layer:
        # Input features are 28*28 pixels (flattened image) and output is 128 features.
        self.fc1 = nn.Linear(28 * 28, 128)
        # Define the second fully connected layer:
        # Input features from the previous layer and output should be 10 classes (digits 0-9).
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # Flatten the input x from shape [batch_size, 1, 28, 28] to [batch_size, 28*28]
        x = x.view(-1, 28 * 28)
        # Apply the first layer then a ReLU activation function to introduce non-linearity.
        x = F.relu(self.fc1(x))
        # Pass the output through the second layer which outputs the logits for 10 classes.
        x = self.fc2(x)
        return x

# Instantiate the model.
model = SimpleNN()

# Define the optimizer and the loss function.
# - Adam is a popular optimizer that adapts the learning rate.
# - CrossEntropyLoss combines softmax and negative log likelihood loss, appropriate for multi-class classification.
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop: iterate over the dataset multiple times (epochs).
for epoch in range(5):  # Train for 5 epochs.
    model.train()  # Set the model to training mode.
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()          # Clear gradients from the previous iteration.
        output = model(data)           # Forward pass: compute the model output for the current batch.
        loss = criterion(output, target)  # Compute the loss between prediction and true labels.
        loss.backward()                # Backward pass: compute the gradients.
        optimizer.step()               # Update model parameters using the gradients.

        # Print current training progress every 100 batches.
        if batch_idx % 100 == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item()}")

    # Evaluation loop to assess model performance on the test dataset.
    model.eval()   # Set the model to evaluation (inference) mode.
    test_loss = 0
    correct = 0
    with torch.no_grad():  # Disable gradient calculation for inference.
        for data, target in test_loader:
            output = model(data)                # Compute output for test data.
            test_loss += criterion(output, target).item()  # Sum up batch losses.
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max logit (predicted class).
            correct += pred.eq(target.view_as(pred)).sum().item()  # Count correct predictions.

    test_loss /= len(test_loader.dataset)  # Compute average loss.
    accuracy = 100. * correct / len(test_loader.dataset)  # Calculate accuracy in percentage.
    print(f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n")

100%|██████████| 9.91M/9.91M [00:00<00:00, 56.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.71MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.0MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.81MB/s]


Train Epoch: 0 [0/60000] Loss: 2.298306465148926
Train Epoch: 0 [6400/60000] Loss: 0.3972071707248688
Train Epoch: 0 [12800/60000] Loss: 0.283642053604126
Train Epoch: 0 [19200/60000] Loss: 0.25036129355430603
Train Epoch: 0 [25600/60000] Loss: 0.05858876556158066
Train Epoch: 0 [32000/60000] Loss: 0.20605993270874023
Train Epoch: 0 [38400/60000] Loss: 0.25541502237319946
Train Epoch: 0 [44800/60000] Loss: 0.12323227524757385
Train Epoch: 0 [51200/60000] Loss: 0.12212136387825012
Train Epoch: 0 [57600/60000] Loss: 0.07801207154989243

Test set: Average loss: 0.0001, Accuracy: 9581/10000 (95.81%)

Train Epoch: 1 [0/60000] Loss: 0.15849310159683228
Train Epoch: 1 [6400/60000] Loss: 0.13726764917373657
Train Epoch: 1 [12800/60000] Loss: 0.04269258305430412
Train Epoch: 1 [19200/60000] Loss: 0.11131531745195389
Train Epoch: 1 [25600/60000] Loss: 0.10878986865282059
Train Epoch: 1 [32000/60000] Loss: 0.17630933225154877
Train Epoch: 1 [38400/60000] Loss: 0.15944647789001465
Train Epoch: 1 [