## MNIST Dataset Classification

In [None]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [None]:
from torchsummary import summary

In [None]:
!git clone https://github.com/KuoYuChang/MLDL_video_course.git
%cd MLDL_video_course

#### load dataset
#### via torchvision

In [None]:
# define data transform
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

In [None]:
train_set = datasets.MNIST('../data', train=True, download=True,
                       transform=transform)
test_set = datasets.MNIST('../data', train=False,
                   transform=transform)

In [None]:
batch_size = 32
test_batch_size = 1

train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}

train_loader = torch.utils.data.DataLoader(train_set,**train_kwargs)
test_loader = torch.utils.data.DataLoader(test_set, **test_kwargs)

In [None]:
dataiter = iter(train_loader)
data_i, label_i = next(dataiter)

In [None]:
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [None]:
from utils.plot_tools import plot_images

In [None]:
plot_images(data_i, label_i, classes, normalize = True)

### define model

In [None]:
class Net(nn.Module):
    def __init__(self, input_shape, lr=1.0, gamma=0.7):
        super(Net, self).__init__()

        flat_len = input_shape[0] * input_shape[1]
        
        self.fc1 = nn.Linear(flat_len, 128)
        self.fc2 = nn.Linear(128, 10)

        self.nll_loss = nn.NLLLoss()

        # optimizer, scheduler
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.scheduler = StepLR(self.optimizer, step_size=1, gamma=gamma)

    def forward(self, x):
        
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

    def train_step(self, inputs, labels):
        # input should already be in device
        
        # output
        output = model(inputs)
        

        # loss
        loss = self.nll_loss(output, labels)
        loss.backward()
        

        # optimize
        self.optimizer.step()

        return loss.item()

In [None]:
lr = 0.001
gamma = 0.9

input_shape = [28, 28]

device = torch.device("cuda")
log_interval = 500

model = Net(input_shape, lr, gamma)
model.to(device)

In [None]:
print(summary(model, input_size=(28,28)))

### training

In [None]:
num_epoch = 6
model.train()

for epoch in range(1, num_epoch + 1):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        model.optimizer.zero_grad()
        loss = model.train_step(data, target)
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss))

    model.scheduler.step()

### testing

In [None]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [None]:
## give an image input, show predict result

## references

### * torch official example code
### https://github.com/pytorch/examples/blob/main/mnist/main.py