<a href="https://colab.research.google.com/github/KevinFallon/mnist_project/blob/main/MNIST_FeedForward_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [2]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

# HYPER PARAMs
num_epochs = 3
batch_size = 128
learning_rate = 0.001


In [3]:
img_transforms = tv.transforms.Compose([
    tv.transforms.ToTensor(),
])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=img_transforms)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=img_transforms)

mnist_train_loader = DataLoader(mnist_trainset, batch_size=batch_size, shuffle=True)
mnist_test_loader = DataLoader(mnist_testset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 274370995.01it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 33686232.99it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 203666148.26it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz



100%|██████████| 4542/4542 [00:00<00:00, 5958876.69it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
class FeedForward(nn.Module):
  def __init__(self, num_classes=10):
    super(FeedForward, self).__init__()

    self.fc1 = nn.Sequential(
        nn.Linear(in_features=28*28, out_features=28*28),
        nn.ReLU(inplace=True),
    )

    self.final_fc = nn.Linear(28*28, out_features=num_classes)

  def forward(self, x):
    x = x.view(x.size(0), -1)
    x = self.fc1(x)
    x = self.final_fc(x)
    return x

In [5]:
def train(model, optimizer, train_loader, loss_module, number_epochs, batch_size):
  model.train()

  for epoch in range(number_epochs):
    for i, (images, labels) in enumerate(train_loader):
      images = images.to(device)
      labels = labels.to(device)

      outputs = model(images)
      loss = loss_module(outputs, labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (i+1) % batch_size == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, number_epochs, i+1, len(train_loader), loss.item()))


In [6]:
model = FeedForward().to(device)

loss_module = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train(model, optimizer, mnist_train_loader, loss_module, num_epochs, batch_size)

Epoch [1/3], Step [128/469], Loss: 0.2766
Epoch [1/3], Step [256/469], Loss: 0.2476
Epoch [1/3], Step [384/469], Loss: 0.1522
Epoch [2/3], Step [128/469], Loss: 0.0865
Epoch [2/3], Step [256/469], Loss: 0.0575
Epoch [2/3], Step [384/469], Loss: 0.1116
Epoch [3/3], Step [128/469], Loss: 0.0279
Epoch [3/3], Step [256/469], Loss: 0.0122
Epoch [3/3], Step [384/469], Loss: 0.0720


In [8]:
def eval_model(model, data_loader):
  # Set model to eval mode for inference
  model.eval()

  true_preds, num_preds = 0., 0.

  with torch.no_grad(): # Deactivate gradients for inference
    for data_inputs, data_labels in data_loader:
      # Move the data to the same device as the model and loss.
      data_inputs = data_inputs.to(device)
      data_labels = data_labels.to(device)
      # Output is [batchsize, 10]. 10 is because each has a probability for each
      # number's prediction
      preds = model(data_inputs)
      # Take the max over the softmax results aka the probability the picture is
      # a drawing of a number between 0-9. The softmax results are in dim=1.
      # torch.max outputs the (64 max_probabilities, 64 indicies for corresponding softmax value)
      # Index for a softmax value maps to the number it represents, i.e.
      # softmax_class[5] returns the number 5.
      pred = torch.max(preds, dim=1, keepdim=True)[1]
      # View  data_labels as the same size as pred. Move to the CPU and compute
      # the sum of the number of equal/correct predictions.
      true_preds += pred.eq(data_labels.view_as(pred)).cpu().sum()
      # Get the number of predictions by looking at the first index of the shape
      # which is typically m num training examples.
      num_preds += data_labels.shape[0]
    print('Accuracy: {:.4f}'.format(100.0*float(true_preds) / len(data_loader.dataset)))

eval_model(model, mnist_test_loader)

Accuracy: 97.6100
