In [None]:
import torch
import torch.nn as nn
import torchvision.datasets as ds
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
import numpy as np

In [None]:
mnist_train = ds.MNIST(root=".", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = ds.MNIST(root=".", train=False, transform=transforms.ToTensor(), download=True)

In [None]:
class MLP(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.hid1 = nn.Sequential(nn.Linear(784,1024), nn.ReLU())
        self.hid2 = nn.Sequential(nn.Linear(1024,512), nn.ReLU())
        self.hid3 = nn.Sequential(nn.Linear(512,512), nn.ReLU())
        self.out = nn.Linear(512,10)
    
    def forward(self, x):
        x = self.flatten(x)
        x = self.hid1(x)
        x = self.hid2(x)
        x = self.hid3(x)
        x = self.out(x)
        return x

dmlp = MLP()

In [None]:
train_dataloader = DataLoader(dataset=mnist_train, batch_size=128, shuffle=True) 
test_dataloader = DataLoader(dataset=mnist_test, batch_size=128, shuffle=True)

loss = nn.CrossEntropyLoss()
optimizer = Adam(dmlp.parameters(), lr=0.0001)
epochs = 50
train_acc_dmlp, test_acc_dmlp = [], []
train_loss_dmlp, test_loss_dmlp = [], []

for epoch in range(epochs):
    
    train_loss_iter, test_loss_iter = [], []
    train_acc_iter, test_acc_iter = [], []
    
    for X, y in train_dataloader:
    
        dmlp.train()
        y_pred = dmlp(X)
        train_loss = loss(y_pred, y)
        train_loss_iter.append(train_loss.item())
        train_acc = (torch.argmax(y_pred, axis=1) == y).sum() / len(y)
        train_acc_iter.append(train_acc.item())

        train_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    for X, y in test_dataloader:
    
        dmlp.eval()
        with torch.no_grad():
            y_pred = dmlp(X)
            test_loss = loss(y_pred, y)
            test_loss_iter.append(test_loss.item())
            test_acc = (torch.argmax(y_pred, axis=1) == y).sum() / len(y)
            test_acc_iter.append(test_acc.item())
            
    train_loss_epoch = round(np.array(train_loss_iter).mean(), 4)
    train_acc_epoch = round(np.array(train_acc_iter).mean(), 4)
    test_loss_epoch = round(np.array(test_loss_iter).mean(), 4)
    test_acc_epoch = round(np.array(test_acc_iter).mean(), 4)
    
    train_loss_dmlp.append(train_loss_epoch)
    train_acc_dmlp.append(train_acc_epoch)
    test_loss_dmlp.append(test_loss_epoch)
    test_acc_dmlp.append(test_acc_epoch)
    
    print(f"Epoch {epoch+1}/{epochs}\tloss: {train_loss_epoch}\taccuracy: {train_acc_epoch} \
    val loss: {test_loss_epoch}\tval accuracy: {test_acc_epoch}\n")

In [None]:
test_acc_list = []

for X, y in test_dataloader:
    
    dmlp.eval()
    with torch.no_grad():
        y_pred = dmlp(X)
        test_acc = (torch.argmax(y_pred, axis=1) == y).sum() / len(y)
        test_acc_list.append(test_acc.item())

res = np.array(test_acc_list).mean()
print(f"정확률 = {res*100}")

In [None]:
torch.save(dmlp.state_dict(), "dmlp_trained.pth")

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(train_acc_dmlp)
plt.plot(test_acc_dmlp)
plt.title("Accuracy graph")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend(["train", "test"])
plt.grid()
plt.show()

In [None]:
plt.plot(train_loss_dmlp)
plt.plot(test_loss_dmlp)
plt.title("Loss graph")
plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend(["train", "test"])
plt.grid()
plt.show()