In [1]:
# Lab 10 MNIST and softmax
import torch
import torchvision.datasets as dsets # 데이터셋 제공
import torchvision.transforms as transforms # 데이터 전처리 등에 쓰이는 함수들 제공

In [2]:
# cuda 지원하면 cuda 쓰고 안되면 cpu 사용

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
# parameters 지정
learning_rate = 0.5
batch_size = 10

In [4]:
# MNIST dataset 다운로드
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


9913344it [00:04, 2196569.76it/s]                             


Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


29696it [00:00, 1766473.57it/s]          


Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


1649664it [00:01, 1610726.34it/s]                             


Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


5120it [00:00, 11086647.64it/s]         

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw






In [5]:
# dataset loader를 이용해 batch size 등을 적용하여 모델 학습에 사용할 수 있다. 
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [6]:
# 레이어를 생성한다. 
w1 = torch.nn.Parameter(torch.Tensor(784, 30)).to(device) # weight 
b1 = torch.nn.Parameter(torch.Tensor(30)).to(device) # bias
w2 = torch.nn.Parameter(torch.Tensor(30, 10)).to(device) # weight
b2 = torch.nn.Parameter(torch.Tensor(10)).to(device) # bias

In [7]:
# 정규분포로 가중치, 편향을 초기화 
torch.nn.init.normal_(w1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(w2)
torch.nn.init.normal_(b2)

Parameter containing:
tensor([ 0.3078, -1.9857,  1.0512,  1.5122, -1.0199, -0.7402, -1.3111,  0.6142,
        -0.6474,  0.1758], requires_grad=True)

In [8]:
# 시그모이드 함수를 직접 작성
def sigmoid(x):
    #  sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))
    # return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))

In [9]:
# 미분을 구현
def sigmoid_prime(x):
    # derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

In [10]:
# test dataset을 일부 가져온다. 
X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)[:1000]
Y_test = mnist_test.test_labels.to(device)[:1000]
i = 0
while not i == 10000:
    for X, Y in data_loader: # 이런 방식으로 X, Y를 가져올 수 있다. 
        i += 1

        # forward 정전파
        X = X.view(-1, 28 * 28).to(device)
        Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device)    # one-hot
        l1 = torch.add(torch.matmul(X, w1), b1) # l = w*x + b
        a1 = sigmoid(l1)
        l2 = torch.add(torch.matmul(a1, w2), b2)
        y_pred = sigmoid(l2) # 최종 입력값(예측값)

        diff = y_pred - Y # 라벨 데이터와 예측 데이터의 차이를 구한다

        # Back prop (chain rule)
        # 모델에 있는 각 가중치, 편향에 대한 그레디언트를 구한다
        d_l2 = diff * sigmoid_prime(l2) 
        d_b2 = d_l2
        d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)

        d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
        d_l1 = d_a1 * sigmoid_prime(l1)
        d_b1 = d_l1
        d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)

        # 가중치를 업데이트한다 
        w1 = w1 - learning_rate * d_w1
        b1 = b1 - learning_rate * torch.mean(d_b1, 0)
        w2 = w2 - learning_rate * d_w2
        b2 = b2 - learning_rate * torch.mean(d_b2, 0)

        # 1000번마다 정확도를 구한다. 1000에 가까워지면 좋은거다
        if i % 1000 == 0:
            l1 = torch.add(torch.matmul(X_test, w1), b1)
            a1 = sigmoid(l1)
            l2 = torch.add(torch.matmul(a1, w2), b2)
            y_pred = sigmoid(l2)
            acct_mat = torch.argmax(y_pred, 1) == Y_test
            acct_res = acct_mat.sum()
            print(acct_res.item())

        if i == 10000:
            break



804
856
859
883
895
901
902
902
896
910
