In [None]:
# import necessary libraries

import torch
from torch import nn
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# check the version of pytorch
torch.__version__

In [None]:
%config InlineBackend.figure_format='retina'

In [None]:
# set the random seed
torch.manual_seed(316)

# 1. Load Data

In [None]:
# define transform for train, valid
transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)

In [None]:
# download the dataset
dataset = datasets.MNIST('.', download=True, train=True, transform=transform)

In [None]:
# check the length of dataset
len(dataset)

## 1-1. Split the data into training set and validation set

In [None]:
# split
len_trainset = int(len(dataset) * 0.8)
len_valset = len(dataset) - len_trainset

trainset, valset = torch.utils.data.random_split(dataset, [len_trainset, len_valset])

In [None]:
# check the dataset size for training and validation
print(len(trainset))
print(len(valset))

## 1-2. Training set & Validation set

In [None]:
# set the batch size as 32
batch_size = 32

# loader for the training set
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=batch_size,
    shuffle=True
)

# loader for the validation set
valloader = torch.utils.data.DataLoader(
    valset,
    batch_size=batch_size,
    shuffle=True
)

# 2. Explore Data

In [None]:
# get a first batch
images, labels = next(iter(trainloader))

In [None]:
# check the shape of dataset
images.shape

In [None]:
# plot the images
fig, ax = plt.subplots(ncols=5, nrows=3, figsize=(12,6))
ax = np.ravel(ax)

for index in range(15):
    ax[index].imshow(images[index][0], cmap="gray")
    label = labels[index]
    ax[index].set_title(f"Label: {label}")
    ax[index].set_xticks([])
    ax[index].set_yticks([])

plt.show()

# 3. Build Neural Network

In [None]:
# Use one hidden layer and one output layer 
# Use ReLU activation function to add non-linearity 
class Network(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(784, 512)   # 1 * 28 * 28 = 784
        self.fc2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 28 * 28)   # flatten the input images
        out = self.relu(self.fc1(x))
        out = self.fc2(out)

        return out

model = Network()
print(model)

In [None]:
from torchinfo import summary

# check the network information
summary(model, input_size=(batch_size, 1, 28, 28), device="cpu")

## 4. Training and Validation

### Check GPU state

In [None]:
torch.cuda.is_available()

In [None]:
torch.cuda.device_count()

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.current_device())

In [None]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

### model

In [None]:
model.to(device)

In [None]:
import time

In [None]:
from torch import optim

# set up an optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# define the loss
criterion = nn.CrossEntropyLoss()

In [None]:
start_time = time.time()

# set the number of epochs
n_epochs = 50

# lists to record the loss and accuracy
train_loss_history = []
val_loss_history = []
accuracy_history = []

for epoch in range(n_epochs):

    train_loss = 0

    model.train()

    for images, labels in trainloader:

        images, labels = images.to(device), labels.to(device)

        # zero the gradients on each training pass
        optimizer.zero_grad()

        # make a forward pass through the network
        logits = model.forward(images)

        # use the network output to calculate the loss (output vs. ground truth)
        loss = criterion(logits, labels)

        # make a backward pass through the network to calculate the gradients (backpropagation)
        loss.backward()

        # update the weights
        optimizer.step()

        # keep track of the loss per epoch
        train_loss += loss.item()

    else:

        # calculate the loss of trainset and record
        train_loss = train_loss / len(trainloader)
        train_loss_history.append(train_loss)

        val_loss = 0
        accuracy = 0

        # validation start
        with torch.no_grad():

            # set model as evaluation mode
            model.eval()

            for images, labels in valloader:

                images, labels = images.to(device), labels.to(device)

                logits = model.forward(images)

                loss = criterion(logits, labels)

                val_loss += loss.item()

                probabilities = logits.softmax(dim=1)

                # get only one class with the highest probability
                top_probs, top_classes = probabilities.topk(k=1, dim=1)

                # find the corrects values
                corrects = (top_classes == labels.view(*top_classes.shape))
                accuracy += torch.mean(corrects.type(torch.FloatTensor))

        # calulate the validation loss and record
        val_loss = val_loss / len(valloader)
        val_loss_history.append(val_loss)

        # calculate the validation accuracy and record
        accuracy = accuracy / len(valloader)
        accuracy_history.append(accuracy)

        # print the current state
        metrics = f"Epoch: {epoch + 1:02}/{n_epochs:02}"
        metrics += " | "
        metrics += f"Train loss: {train_loss:.3f}"
        metrics += " | "
        metrics += f"Validation loss: {val_loss:.3f}"
        metrics += " | "
        metrics += f"Accuracy: {accuracy:.3f}"
        print(metrics)

print("Elapsed: {0:.2f} seconds".format(time.time() - start_time))

# 5. Metrics

In [None]:
# plot the result matrix (loss of trainset and valset)
fig, ax = plt.subplots(figsize=(6,6))

ax.plot(train_loss_history, label="Train Loss")
ax.plot(val_loss_history, label="Validation Loss")

ax.set_xlabel("epoch")
ax.set_ylabel("loss")
ax.set_title("Train Loss & Validation Loss")
ax.legend()

plt.show()

In [None]:
# plot the result matrix (accuracy of valset)
fig, ax = plt.subplots()

ax.plot(accuracy_history, label="Accuracy")

ax.set_xlabel("epoch")
ax.set_ylabel("accuracy")
ax.set_title("Accuracy")

plt.show()

# 6. Save the model

In [None]:
# save the model named "model_state_dict.pth"
torch.save(model.state_dict(), "model_state_dict.pth")

# 7. Test the model with Training set

## 7-1. Load the Test dataset

In [None]:
# download the testset
testset = datasets.MNIST('.', download=True, train=False, transform=transform)

# loader for the testset
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=batch_size,
    shuffle=True
)

## 7-2. Test

In [None]:
# set model to evaluation mode
model.eval()

total = 0  # stores the total number of testset
total_corrects = 0  # stores the total number the model predict correctly

with torch.no_grad():

    for images, labels in testloader:

        images, labels = images.to(device), labels.to(device)

        logits = model.forward(images)

        probabilities = logits.softmax(dim=1)

        top_probs, top_classes = probabilities.topk(k=1, dim=1)
        corrects = (top_classes == labels.view(*top_classes.shape))
        corrects = int(torch.sum(corrects).cpu().numpy())
        total_corrects += corrects

        total += labels.size(0)

In [None]:
# calculate the test accuracy (%)
test_accuracy = total_corrects / total * 100.0
print(test_accuracy)

# 8. 손글씨 inference

## 8-1. inference를 위한 transform 정의

In [None]:
# convert image to grayscale image
# resize the image to 28 * 28 image
# convert image to tensor image
inf_transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
    ]
)

## 8-2. 이미지 가져오기 & Inference 실행

In [None]:
import os
from PIL import Image

# set the folder's path
folder_path = "./images"

# get the name of image files as list
image_names = [file for file in os.listdir(folder_path) if file.endswith(('jpg', 'png'))]

# get the labels of inputs
inference_labels = [int(file.split('_')[0]) for file in image_names]

In [None]:
correct_images = []
correct_probs = []
incorrect_images = []
incorrect_pred_labels = []
incorrect_actual_labels = []

# create the input tensor list
for image_name, label in zip(image_names, inference_labels):
    image = Image.open(os.path.join(folder_path, image_name))
    input_image = inf_transform(image).unsqueeze(0)

    input_image = input_image.to(device)
    
    output = model.forward(input_image)
    
    probability = output.softmax(dim=1)

    top_prob, top_class = probability.topk(k=1, dim=1)

    if top_class == label:
        correct_images.append(input_image)
        correct_probs.append(top_prob.item())

    else:
        incorrect_images.append(input_image)
        incorrect_pred_labels.append(top_class)
        incorrect_actual_labels.append(label)

accuracy = len(incorrect_images) / len(image_names) * 100.0

In [None]:
accuracy

In [None]:
# plot the images
fig, ax = plt.subplots(ncols=3, nrows=3, figsize=(12,6))
ax = np.ravel(ax)

for index in range(9):
    ax[index].imshow(incorrect_images[index].squeeze(0).squeeze(0).cpu().numpy(), cmap="gray")
    label = incorrect_actual_labels[index]
    ax[index].set_title(f"Label: {label}")
    ax[index].set_xticks([])
    ax[index].set_yticks([])

plt.show()