<a href="https://colab.research.google.com/github/KwonDoRyoung/ABRLaboratory/blob/main/0722/3_train_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import transforms and datasets in TorchVision
import torchvision.transforms as T
import torchvision.datasets as datasets

# load or download MNIST datasets
train_data = datasets.MNIST(root='./data', train=True, download=True, transform=T.ToTensor())
valid_data = datasets.MNIST(root='./data', train=True, download=True, transform=T.ToTensor())

In [None]:
# setup configuration
import torch
import numpy as np

valid_ratio = 0.1
shuffle = True
random_seed = 0xAB

# fix random seed
np.random.seed(random_seed)
torch.manual_seed(random_seed)
#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False

<torch._C.Generator at 0x7fa200182a70>

In [None]:
# split given train data into train and valid dataset
from torch.utils.data.sampler import SubsetRandomSampler

num_train = len(train_data)
indices = list(range(num_train))
num_valid = int(valid_ratio * num_train)

if shuffle:
    np.random.shuffle(indices)

train_idx, valid_idx = indices[num_valid:], indices[:num_valid]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [None]:
# import DataLoader in PyTorch
from torch.utils.data import DataLoader

# create data loaders to feed data into our model
batch_size = 64
train_dataloader = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
valid_dataloader = DataLoader(valid_data, batch_size=batch_size, sampler=valid_sampler)

In [None]:
# import nn in PyTorch
import torch
import torch.nn as nn

# define MLP network with one hidden layer (original version)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten() # flatten the input tensor as a 1D vector ((28, 28) -> (784))
        self.input_layer = nn.Linear(28*28, 512)
        self.hidden_layer = nn.Linear(512, 256)
        self.output_layer = nn.Linear(256, 10)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.flatten(x)
        h = self.relu(self.input_layer(x))
        h = self.relu(self.hidden_layer(h))
        y = self.output_layer(h)
        return y

In [None]:
# import cuda in PyTorch
import torch.cuda as cuda

# check device for training
device = 'cuda' if cuda.is_available() else 'cpu'

# get the MLP model and send it to the device
myMLP = MLP().to(device)

# define a loss function and an optimizer
loss_fn = nn.CrossEntropyLoss(reduction='mean') # cross entropy loss function for classification
optimizer = torch.optim.SGD(myMLP.parameters(), lr=5e-3) # stochastic gradient descent with learning rate of 0.005

In [None]:
# train the MLP model
epochs = 5
report_interval = 100
max_valid_acc = 0
save_path = 'model.pt'

ndata = len(train_dataloader.dataset) 
print('training starts!')

for e in range(epochs):
    print(f'\nepoch {e+1}\n------------------------------')
    myMLP.train() # train mode
    
    for b, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device) # input and target to device(gpu)

        prediction = myMLP(X) # forward pass
        train_loss = loss_fn(prediction, y) # calculate the loss 

        optimizer.zero_grad() # clear gradients
        train_loss.backward() # backpropagation
        optimizer.step() # update the parameters

        if b % report_interval == 0: # track the training
            train_loss, current = train_loss.item(), b * len(X) 
            print(f'[{current:>5d}/{ndata:>5d}]  train loss: {train_loss:>7f}  ', end="")
            
            myMLP.eval()
            with torch.no_grad():
                valid_ndata = 0
                valid_nbatch = len(valid_dataloader)
                valid_loss, valid_correct = 0, 0
                for X, y in valid_dataloader:
                    X, y = X.to(device), y.to(device)
                    prediction = myMLP(X)
                    valid_loss += loss_fn(prediction, y).item() # add up the loss
                    valid_correct += (prediction.argmax(1) == y).type(torch.float).sum().item() # add up the correct predictions
                    valid_ndata += len(X)
                valid_loss /= valid_nbatch
                valid_correct /= valid_ndata
                print(f"valid accuracy: {(100*valid_correct):>0.1f}%, valid loss: {valid_loss:>8f}")
            
            if max_valid_acc < valid_correct:
                torch.save(myMLP.state_dict(), save_path)
                max_valid_acc = valid_correct

print('\ntraining is finished!')

training starts!

epoch 1
------------------------------
[    0/60000]  train loss: 2.307188  valid accuracy: 6.5%, valid loss: 2.307819
[ 6400/60000]  train loss: 2.282454  valid accuracy: 19.5%, valid loss: 2.284902
[12800/60000]  train loss: 2.273978  valid accuracy: 38.3%, valid loss: 2.261636
[19200/60000]  train loss: 2.222906  valid accuracy: 45.3%, valid loss: 2.233253
[25600/60000]  train loss: 2.202410  valid accuracy: 56.7%, valid loss: 2.196545
[32000/60000]  train loss: 2.135819  valid accuracy: 67.2%, valid loss: 2.148727
[38400/60000]  train loss: 2.090995  valid accuracy: 70.0%, valid loss: 2.085641
[44800/60000]  train loss: 1.986894  valid accuracy: 70.8%, valid loss: 2.000939
[51200/60000]  train loss: 1.915801  valid accuracy: 73.2%, valid loss: 1.893889

epoch 2
------------------------------
[    0/60000]  train loss: 1.844471  valid accuracy: 73.6%, valid loss: 1.836896
[ 6400/60000]  train loss: 1.642508  valid accuracy: 74.2%, valid loss: 1.689183
[12800/60000]