# Multi Layer Perceptron

More infos can be found on [Edward Choi youtube](https://www.youtube.com/@mp2893/featured) channel\
KAIST AI504

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

In [2]:
# set gpu by number 
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # setting gpu number

In [3]:
# print the version of PyTorch
print(torch.__version__)

2.0.1+cu117


## Load MNIST

In [4]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='../', train=True, transform=transforms.ToTensor(), download=False)
test_dataset = torchvision.datasets.MNIST(root='../', train=False, transform=transforms.ToTensor())

# Data loader
# mini batch size
train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)

## Model

In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()  # sigmoid activation function
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        out = self.fc3(out)
        return out

## Training

In [6]:
# Generate model
model = NeuralNet(784, 20, 10)  # init(784, 20, 10)
# input dim: 784  / hidden dim: 20  / output dim: 10

# Upload model to GPU
model = model.to('cuda')

# Loss function define (we use cross-entropy)
loss_fn = nn.CrossEntropyLoss()

# Define optimizer
# optimizer = torch.optim.SGD(model.parameters(), lr=0.05) 
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.05)

# Train the model
total_step = len(train_loader)

for epoch in range(10):
    for i, (images, labels) in enumerate(train_loader):  # mini batch for loop
        # upload to gpu
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        
        # Forward
        outputs = model(images)  # forwardI(images): get prediction
        loss = loss_fn(outputs, labels)  # calculate the loss (cross entropy loss) with ground truth & prediction value
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()  # automatic gradient calculation (autograd)
        optimizer.step()  # update model parameter with requires_grad=True 
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 10, i+1, total_step, loss.item()))

Epoch [1/10], Step [100/469], Loss: 2.2533
Epoch [1/10], Step [200/469], Loss: 1.6477
Epoch [1/10], Step [300/469], Loss: 1.1149
Epoch [1/10], Step [400/469], Loss: 0.8784
Epoch [2/10], Step [100/469], Loss: 0.5147
Epoch [2/10], Step [200/469], Loss: 0.4597
Epoch [2/10], Step [300/469], Loss: 0.3768
Epoch [2/10], Step [400/469], Loss: 0.4204
Epoch [3/10], Step [100/469], Loss: 0.3063
Epoch [3/10], Step [200/469], Loss: 0.2942
Epoch [3/10], Step [300/469], Loss: 0.3494
Epoch [3/10], Step [400/469], Loss: 0.3297
Epoch [4/10], Step [100/469], Loss: 0.4042
Epoch [4/10], Step [200/469], Loss: 0.2513
Epoch [4/10], Step [300/469], Loss: 0.2769
Epoch [4/10], Step [400/469], Loss: 0.3195
Epoch [5/10], Step [100/469], Loss: 0.2981
Epoch [5/10], Step [200/469], Loss: 0.1646
Epoch [5/10], Step [300/469], Loss: 0.3827
Epoch [5/10], Step [400/469], Loss: 0.2156
Epoch [6/10], Step [100/469], Loss: 0.1741
Epoch [6/10], Step [200/469], Loss: 0.1926
Epoch [6/10], Step [300/469], Loss: 0.2600
Epoch [6/10

## Testing

In [7]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # classification -> get the label prediction of top 1 
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 95.25 %
