## Import Required Libraries

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# parameters
learning_rate = 0.5
batch_size = 10

### torchvision.datasets 의 Parameter
- root : 데이터셋을 어느 경로에 받을 것인가?
- train : Train dataset을 다운 받을 것인지? Test dataset을 다운 받을 것인지?
- transform : 일반 이미지 (H,W,C) (픽셀값 : 0 ~ 255) --> Torch에서 쓸 수 있는 이미지 (C,H,W) (픽셀값 : 0 ~ 1)
- download : 만약 없을시 다운로드를 할 것인가?

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

### torch.utils.data.DataLoader 의 Parameter
- dataset : 어떤 데이터셋을 읽어올 것인지?
- batch_size : batch_size를 몇으로 할 것인지?
- shuffle : dataset을 섞을 것인지?
- drop_last : 만약 batch_size만큼 데이터를 가져왔을 때, 남은 데이터는 어떻게 처리할 것인지?

In [5]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# Weight, bias 생성
# Input : 28*28(784), Output : 10

w1 = nn.Parameter(torch.Tensor(784, 30)).to(device)
b1 = nn.Parameter(torch.Tensor(30)).to(device)
w2 = nn.Parameter(torch.Tensor(30, 10)).to(device)
b2 = nn.Parameter(torch.Tensor(10)).to(device)

In [7]:
# Initialize
nn.init.normal_(w1)
nn.init.normal_(b1)
nn.init.normal_(w2)
nn.init.normal_(b2)

tensor([ 1.8153,  0.1568,  0.0348,  0.0334,  0.9967,  0.3957,  1.0805,  0.0302,
        -0.4433, -0.0206], device='cuda:0', grad_fn=<CopyBackwards>)

In [8]:
# sigmoid function
    
def sigmoid(x):
    return 1.0 / (1.0 + torch.exp(-x))

In [9]:
# derivative of the sigmoid function (미분 값)

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [10]:
X_test = mnist_test.data.view(-1, 28 * 28).float().to(device)[:1000]
Y_test = mnist_test.targets.to(device)[:1000]
# X_Test, Y_Test 모두 1000개만 뽑아서 사용

i = 0
while not i == 10000:
    for X, Y in data_loader:
        i += 1
        
        # Training
        
        # forward
        X = X.view(-1, 28 * 28).to(device)    # X의 shape (10,1,28,28) -> (10,784)
       
        Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device)    # one-hot, Y.shape(10,10)
        l1 = torch.add(torch.matmul(X, w1), b1)   #l1.shape (10,30)
        a1 = sigmoid(l1)
        l2 = torch.add(torch.matmul(a1, w2), b2)  #l2.shape (10,10)
        y_pred = sigmoid(l2)

        diff = y_pred - Y

        # Back prop (chain rule)
        d_l2 = diff * sigmoid_prime(l2)
        d_b2 = d_l2
        d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)

        d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
        d_l1 = d_a1 * sigmoid_prime(l1)
        d_b1 = d_l1
        d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)

        w1 = w1 - learning_rate * d_w1
        b1 = b1 - learning_rate * torch.mean(d_b1, 0)
        w2 = w2 - learning_rate * d_w2
        b2 = b2 - learning_rate * torch.mean(d_b2, 0)

        if i % 1000 == 0:
            # Test
            l1 = torch.add(torch.matmul(X_test, w1), b1)
            a1 = sigmoid(l1)
            l2 = torch.add(torch.matmul(a1, w2), b2)
            y_pred = sigmoid(l2)
            
            # Accuarcy
            acct_mat = torch.argmax(y_pred, 1) == Y_test
            acct_res = acct_mat.sum()
            print(acct_res.item())

        if i == 10000:
            print('Finish')
            break

814
843
868
882
892
892
897
898
914
909
Finish
