# MNIST Image recognition

## Imports

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from PIL import Image

## Setup
Here, we define hyper-parameters, which define how our model learns.
Additionally, here could be other global variables

In [None]:
# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 10

Here is the decision, whether we use the gpu or cpu. Using the gpu needs torch to be compiled with cuda support! <br>
Almost everything needs to be loaded onto the device, by using `.to(device)` on the model or other tensors.

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

## Data loading
Here we load the data to train our model. Either use a predefined dataset like MNIST or just load your own data.<br>
In Detail:
- `Dataset` contains the data + labels
- `DataLoader` contains method to load data from the Dataset in batches

In [None]:
training_data = datasets.MNIST(
	root="data",
	train=True,
	download=True,
	transform=ToTensor()
)

test_data = datasets.MNIST(
	root="data",
	train=False,
	download=True,
	transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

## Neural Network
The actual implementation of our network. The only important method is the `forward` method, which defines how our model transforms the input data. The construtor is used to define the used methods, to make our forward method look nice and tidy.

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
			nn.Linear(28*28, 784),
			nn.ReLU(),
			nn.Linear(784, 784),
			nn.ReLU(),
			nn.Linear(784, 10),
			nn.ReLU()
		)
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

## Train the model
Here we define the learning and evaluation process (during training). The learning steps are as follows:
1. Let our model produce a prediction
2. Calculate the loss using any loss function
3. Calculate the gradient using backpropagation
4. Let the optimizer do it's magic (update parameters in some way)
5. Restore the gradient of the optimizer to zero, to avoid calculating the new gradient based on the old one

The evaluation methods just does the first two steps, and returns how wrong our model is based on the test dataset. Here we wrap our code in torch.no_grad() to avoid calculating unnecessarry gradients.

In [None]:
def train_loop(dataloader: DataLoader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # set the model to training mode
    model.train()
    for ibatch, (input, target) in enumerate(dataloader):
        input, target = input.to(device), target.to(device)
        pred = model(input)
        loss = loss_fn(pred, target)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if ibatch % 100 == 0:
            loss, current = loss.item(), ibatch * batch_size + len(input)
            # progress bar
            width = 50
            print(f"\r[{'=' * int(width * current / size)}{' ' * (width - int(width * current / size))}] {current:>5d}/{size:>5d} Loss: {loss:>7f}", end="")
    print(f"\r[{'=' * width}] Done! Loss: {loss:>7f}")
            
def test_loop(dataloader: DataLoader, model, loss_fn):
    size = len(dataloader.dataset)
    # set the model to evaluation mode
    model.eval()
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    
    # disable gradient computation
    with torch.no_grad():
        for input, target in dataloader:
            input, target = input.to(device), target.to(device) 
            pred = model(input)
            test_loss += loss_fn(pred, target).item()
            correct += (pred.argmax(1) == target).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    

Call the train- and eval-loops with the training data for n-epochs. Also define the loss function and optimizer.

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(epochs):
	print(f"Epoch {t+1}")
	train_loop(train_dataloader, model, loss_fn, optimizer)
	test_loop(test_dataloader, model, loss_fn)
print("Done!")

## Predict
Actually do something with the model. For this, the input needs to be exatly as the training data, so in this case we need to convert our image to grayscale, resize it to 28x28, convert it into an tensor and invert the image to make white near to 0 and black near to 1.

In [None]:
def loadImage(path):
    image = Image.open(path).convert("L") # Load image and convert to grayscale
    transform = transforms.Compose([
		transforms.Resize((28, 28)),
		transforms.ToTensor(),
		transforms.Lambda(lambda x: 1 - x), # Invert image, so that white is 0 and black is 1
	])
    return transform(image).unsqueeze(0) # add batch dimension

Create a small prediction method, which just takes in the image as a tensor (via loadImage), transfer it onto the `device`, the model is loaded on and infer the result. Then we call softmax (not necessary, because argmax returns the highest value anyway) to normalize our logits into a vector, where all values add up to 1 and range from 0 to 1. Then we take the one with the highest probability of success via `argmax(1)`.

In [None]:
def pred(model, image):
	image = image.to(device)
	model.eval()
	with torch.no_grad():
		pred = nn.Softmax(dim=1)(model(image))
		return pred.argmax(1).item()

Use the methods...

In [None]:
X = loadImage("5.png")
print(pred(model, X))

## Debugging
Here i defined some functions, that output the tensors into a human readable format. I used that, to see if my image was correctly represented as a Matrix.

In [None]:
array = loadImage("5.png").squeeze().numpy()
for row in array:
		print(" ".join(f"{pixel:.1f}" for pixel in row))

In [None]:

# show the first 10 items, e.g. image and label
for i in range(10):
	image, label = test_data[i]
	image_array = image.squeeze().numpy()
	print(label)
	for row in image_array:
		print(" ".join(f"{pixel:.1f}" for pixel in row))