In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import struct
import random
from array import array
from os.path  import join
from torch.utils.data import TensorDataset, DataLoader

# MNIST Data Loader Class

class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath, test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.ravel()
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)  

# Define a simple feed-forward NN with five hidden layers    
    
class SimpleNN(nn.Module):
    
    # Each hidden layer is of the same size
    # ReLU activcation is used
    
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.relu = nn.ReLU()
        self.layer1 = nn.Linear(input_size, hidden_size, bias=True)
        self.layer2 = nn.Linear(hidden_size, hidden_size, bias=True)
        self.layer3 = nn.Linear(hidden_size, hidden_size, bias=True)
        self.layer4 = nn.Linear(hidden_size, hidden_size, bias=True)
        self.layer5 = nn.Linear(hidden_size, hidden_size, bias=True)
        self.output_layer = nn.Linear(hidden_size, output_size, bias=True)   

    def forward(self, x):   # The forward pass
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        x = self.output_layer(x)
        return x

# Model evaluation function    
    
def evaluate_model(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to track the gradients
        for images, labels in data_loader:
            outputs = model(images)
            predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Model parameters and hyperparameters
input_size = 784
hidden_size = 16
output_size = 10
batch_size = 64
learn_rate = 0.001
number_of_epochs = 100

# Where is the MNIST data located?
training_images_filepath = 'train-images-idx3-ubyte/train-images-idx3-ubyte'
training_labels_filepath = 'train-labels-idx1-ubyte/train-labels-idx1-ubyte'
test_images_filepath = 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte'
test_labels_filepath = 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte'

# Create the model, specify our loss function, and our optimizer
model = SimpleNN(input_size, hidden_size, output_size)
loss_function = nn.CrossEntropyLoss()   # Let us use cross-entropy loss
optimizer = optim.SGD(model.parameters(), lr=learn_rate)  # Let us use stochastic gradient descent

# Load MINST dataset
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)  # Convert to torch tensor
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Convert to torch tensor
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)  # Convert to torch tensor
y_test_tensor = torch.tensor(y_test, dtype=torch.long)  # Convert to torch tensor
x_train_tensor /= 255.0   # Normalize, scales the pixel values to the range [0, 1], which is standard practice for image data.
x_test_tensor /= 255.0   # Normalize, scales the pixel values to the range [0, 1], which is standard practice for image data.
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

# Training loop

data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(number_of_epochs):
    for batch_input, batch_target in data_loader: 
        # Forward pass
        output = model(batch_input)
        loss = loss_function(output, batch_target)  # Compute loss 

        # Backward pass and optimization
        model.zero_grad()  # Clear existing gradients
        loss.backward()  # Compute gradient of the loss with respect to model parameters
        optimizer.step()  # Update parameters

# Evaluate the model
test_data_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
test_accuracy = evaluate_model(model, test_data_loader)
print(f'Test Accuracy: {test_accuracy}%')


AttributeError: 'bool' object has no attribute 'sum'