In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as ds
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim import SGD, Adam
import numpy as np

In [None]:
mnist_train = ds.MNIST(root="", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = ds.MNIST(root="", train=False, transform=transforms.ToTensor(), download=True)

In [None]:
class MLP(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.hid1 = nn.Sequential(nn.Linear(784,512), nn.Tanh())
        self.out = nn.Sequential(nn.Linear(512,10), nn.Softmax(dim=1))
    
    def forward(self, x):
        x = self.flatten(x)
        x = self.hid1(x)
        x = self.out(x)
        return x

mlp_sgd = MLP()
mlp_adam = MLP()

In [None]:
train_dataloader = DataLoader(dataset=mnist_train, batch_size=128, shuffle=True) 
test_dataloader = DataLoader(dataset=mnist_test, batch_size=128, shuffle=True)

loss = nn.MSELoss()
optimizer = SGD(mlp_sgd.parameters(), lr=0.01)
epochs = 50
train_acc_sgd, test_acc_sgd = [], []

for epoch in range(epochs):
    
    train_loss_iter, test_loss_iter = [], []
    train_acc_iter, test_acc_iter = [], []
    
    for X, y in train_dataloader:
    
        mlp_sgd.train()
        y_pred = mlp_sgd(X)
        y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
        train_loss = loss(y_pred, y)
        train_loss_iter.append(train_loss.item())
        train_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
        train_acc_iter.append(train_acc.item())

        train_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    for X, y in test_dataloader:
    
        mlp_sgd.eval()
        with torch.no_grad():
            y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
            y_pred = mlp_sgd(X)
            test_loss = loss(y_pred, y)
            test_loss_iter.append(test_loss.item())
            test_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
            test_acc_iter.append(test_acc.item())
            
    train_loss_epoch = round(np.array(train_loss_iter).mean(), 4)
    train_acc_epoch = round(np.array(train_acc_iter).mean(), 4)
    test_loss_epoch = round(np.array(test_loss_iter).mean(), 4)
    test_acc_epoch = round(np.array(test_acc_iter).mean(), 4)
    
    train_acc_sgd.append(train_acc_epoch)
    test_acc_sgd.append(test_acc_epoch)
    
    print(f"Epoch {epoch+1}/{epochs}\tloss: {train_loss_epoch}\taccuracy: {train_acc_epoch} \
    val loss: {test_loss_epoch}\tval accuracy: {test_acc_epoch}\n")

In [None]:
test_acc_list = []

for X, y in test_dataloader:
    
    mlp_sgd.eval()
    with torch.no_grad():
        y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
        y_pred = mlp_sgd(X)
        test_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
        test_acc_list.append(test_acc.item())

res = np.array(test_acc_list).mean()
print(f"정확률 = {res*100}")

In [None]:
train_dataloader = DataLoader(dataset=mnist_train, batch_size=128, shuffle=True) 
test_dataloader = DataLoader(dataset=mnist_test, batch_size=128, shuffle=True)

loss = nn.MSELoss()
optimizer = Adam(mlp_adam.parameters(), lr=0.001)
epochs = 50
train_acc_adam, test_acc_adam = [], []

for epoch in range(epochs):
    
    train_loss_iter, test_loss_iter = [], []
    train_acc_iter, test_acc_iter = [], []
    
    for X, y in train_dataloader:
    
        mlp_adam.train()
        y_pred = mlp_adam(X)
        y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
        train_loss = loss(y_pred, y)
        train_loss_iter.append(train_loss.item())
        train_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
        train_acc_iter.append(train_acc.item())

        train_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    for X, y in test_dataloader:
    
        mlp_adam.eval()
        with torch.no_grad():
            y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
            y_pred = mlp_adam(X)
            test_loss = loss(y_pred, y)
            test_loss_iter.append(test_loss.item())
            test_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
            test_acc_iter.append(test_acc.item())
            
    train_loss_epoch = round(np.array(train_loss_iter).mean(), 4)
    train_acc_epoch = round(np.array(train_acc_iter).mean(), 4)
    test_loss_epoch = round(np.array(test_loss_iter).mean(), 4)
    test_acc_epoch = round(np.array(test_acc_iter).mean(), 4)
    
    train_acc_adam.append(train_acc_epoch)
    test_acc_adam.append(test_acc_epoch)
    
    print(f"Epoch {epoch+1}/{epochs}\tloss: {train_loss_epoch}\taccuracy: {train_acc_epoch} \
    val loss: {test_loss_epoch}\tval accuracy: {test_acc_epoch}\n")

In [None]:
test_acc_list = []

for X, y in test_dataloader:
    
    mlp_adam.eval()
    with torch.no_grad():
        y = nn.functional.one_hot(y, 10).type(torch.FloatTensor)
        y_pred = mlp_adam(X)
        test_acc = (torch.argmax(y_pred, axis=1) == torch.argmax(y, axis=1)).sum() / len(y)
        test_acc_list.append(test_acc.item())

res = np.array(test_acc_list).mean()
print(f"정확률 = {res*100}")

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(train_acc_sgd, "r--")
plt.plot(test_acc_sgd, "r")
plt.plot(train_acc_adam, "b--")
plt.plot(test_acc_adam, "b")
plt.title("Comparison of SGD and Adam optimizers")
plt.ylim((0.7,1.0))
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend(["train_sgd", "val_sgd", "train_adam", "val_adam"])
plt.grid()
plt.show()