# MNIST (with cuda)

## 1) 필요한 모듈을 호출해옵니다

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random
import matplotlib.pyplot as plt

## 2) device 지정 & manual seed 설정

In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(1)
random.seed(1)

if device == 'cuda':
    torch.cuda.manual_seed_all(1)

## 3) 학습모델에 필요한 parameter를 설정합니다

In [3]:
learning_rate = 1e-5
epochs = 15
batch_size = 100
drop_prob = 0.3

## 4) 학습, 테스트셋을 불러옵니다 

In [4]:
mnist_train = dsets.MNIST(root='MNIST_data/',
                         train=True,
                         transform=transforms.ToTensor(),
                         download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                        train=False,
                        transform=transforms.ToTensor(),
                        download=True)

## 5) minibatch 학습을 위해 학습셋을 data loader에 담아둡니다

In [5]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)

## 6) 모델을 순차적으로 구성합니다

## 6-1) layer 생성

In [6]:
linear1 = nn.Linear(28*28, 512, bias=True)
linear2 = nn.Linear(512, 512, bias=True)
linear3 = nn.Linear(512, 512, bias=True)
linear4 = nn.Linear(512, 512, bias=True)
linear5 = nn.Linear(512, 10, bias=True)

## 6-2) batch normalization 설정

In [7]:
bn1 = nn.BatchNorm1d(512)
bn2 = nn.BatchNorm1d(512)
bn3 = nn.BatchNorm1d(512)
bn4 = nn.BatchNorm1d(512)

## 6-3) activation function 지정

In [8]:
relu = nn.ReLU()

## 6-4) drop out 설정

In [9]:
dropout = nn.Dropout(p=drop_prob)

## 6-5) xavier initialization 실행

In [10]:
nn.init.xavier_uniform_(linear1.weight)
nn.init.xavier_uniform_(linear2.weight)
nn.init.xavier_uniform_(linear3.weight)
nn.init.xavier_uniform_(linear4.weight)
nn.init.xavier_uniform_(linear5.weight)

Parameter containing:
tensor([[ 0.0147, -0.0003, -0.0210,  ...,  0.0707, -0.0314, -0.0136],
        [ 0.0718, -0.0103,  0.0366,  ..., -0.0319,  0.0462,  0.0303],
        [ 0.0575, -0.0890, -0.0492,  ...,  0.0100,  0.0807, -0.0359],
        ...,
        [-0.0403,  0.0531, -0.0981,  ...,  0.0617, -0.0011,  0.0624],
        [-0.0896,  0.0671,  0.0815,  ...,  0.0659,  0.1023, -0.0633],
        [ 0.1020,  0.0481, -0.0295,  ..., -0.1030, -0.0380, -0.0858]],
       requires_grad=True)

## 6-6) model 구축

In [11]:
model = nn.Sequential(linear1, bn1, relu, dropout,
                     linear2, bn2, relu, dropout,
                     linear3, bn3, relu, dropout,
                     linear4, bn4, relu, dropout,
                     linear5)

## 7) 학습 전 loss function과 optimizer를 지정해줍니다

In [14]:
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## 8) for문을 돌면서 training을 시작합니다

In [None]:
total_batch = len(data_loader)
model.train()
for epoch in range(1, epochs+1):
    avg_loss = 0
    
    for x_train, y_train in data_loader:
        x_train = x_train.view(-1, 28*28).to(device)
        y_train = y_train.to(device)
        
        prediction = model(x_train)
        
        loss = loss_function(prediction, y_train)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        avg_loss += loss / total_batch
        
    print('epoch: {:5d}/{} >>> loss: {:.6f}'.format(epoch, epochs, avg_loss.item()))
    
print('learning finished')

## 9) 학습된 모델을 평가합니다

In [None]:
with torch.no_grad():
    model.eval()
    x_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    y_test = mnist_test.test_labels.to(device)
    
    prediction = model(x_test)
    correct = torch.argmax(prediction, 1) == y_test
    accuracy = correct.float().mean()
    print(accuracy.item())
    
    r = random.randint(0, len(mnist_test)-1)
    x_random_data = mnist_test.test_data[r:r+1].view(-1, 28*28).float().to(device)
    y_random_data = mnist_test.test_labels[r:r+1].to(device)
    
    print('Label: ', y_random_data.item())
    single_prediction = model(x_random_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())
    
    plt.imshow(mnist_test.test_data[r:r+1].view(28,28), cmap='Greys', interpolation='nearest')
    plt.show()