In [186]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataloader as dataloader
import torch.optim as optim

from torch.autograd import Variable
from torchvision import transforms
from torchvision.datasets import MNIST, mnist

## Data

* [Pytorch Transform Documentation](http://pytorch.org/docs/torchvision/transforms.html)


1. **torchvision.transforms.Compose:** 여러개의 tranforms을 실행합니다. 
2. **torchvision.transforms.ToTensor:** PIL.Image 또는 [0, 255] range의 Numpy array(H x W x C)를 (C x H x W)의 **[0.0, 1.0] range**를 갖은 torch.FloatTensor로 변형시킵니다. <br>여기서 포인트가 0에서 1사이의 값을 갖은 값으로 normalization이 포함되있습니다. 
3. **dataloader.DataLoader:** 사용하여 training시킬때 1개의 batch를 가져올때 shape이 **torch.Size([64, 1, 28, 28])** 이렇게 나옵니다. 

In [188]:
mnist.


<module 'torch.utils.data.dataset' from '/usr/local/lib/python3.6/site-packages/torch/utils/data/dataset.py'>

In [176]:
train = MNIST('./data', train=True, download=True, transform=transforms.Compose([
    transforms.ToTensor(), # ToTensor does min-max normalization. 
]), )

test = MNIST('./data', train=False, download=True, transform=transforms.Compose([
    transforms.ToTensor(), # ToTensor does min-max normalization. 
]), )


train_loader = dataloader.DataLoader(train, shuffle=True, batch_size=64, num_workers=1, pin_memory=True)
test_loader = dataloader.DataLoader(test, shuffle=True, batch_size=64, num_workers=1, pin_memory=True)

In [43]:
train.train_data = train.train_data.cuda()
test.test_data = test.test_data.cuda()

In [145]:
train_data = train.train_data
train_data = train.transform(train_data.numpy())

print('[Train]')
print(' - Numpy Shape:', train.train_data.cpu().numpy().shape)
print(' - Tensor Shape:', train.train_data.size())
print(' - Transformed Shape:', train_data.size())
print(' - min:', torch.min(train_data))
print(' - max:', torch.max(train_data))
print(' - mean:', torch.mean(train_data))
print(' - std:', torch.std(train_data))
print(' - var:', torch.var(train_data))

[Train]
 - Numpy Shape: (60000, 28, 28)
 - Tensor Shape: torch.Size([60000, 28, 28])
 - Transformed Shape: torch.Size([28, 60000, 28])
 - min: 0.0
 - max: 1.0
 - mean: 0.13066047740240005
 - std: 0.3081078089011192
 - var: 0.0949304219058486


## Model

In [146]:
class Model(nn.Module):
    def __init__(self, parameters=[784, 625, 361, 144, 49, 10]):
        super(Model, self).__init__()
        
        self.fc1 = nn.Linear(784, 625)
        self.bc1 = nn.BatchNorm1d(625)
        
        self.fc2 = nn.Linear(625, 361)
        self.bc2 = nn.BatchNorm1d(361)
        
        self.fc3 = nn.Linear(361, 144)
        self.bc3 = nn.BatchNorm1d(144)
        
        self.fc4 = nn.Linear(144, 49)
#         self.bc4 = nn.BatchNorm1d(49)
        
        self.fc5 = nn.Linear(49, 10)
        
        
        self.fc6 = nn.Linear(784, 10)
        
    def forward(self, x):
        x = x.view((-1, 784))
        h = self.fc1(x)
        h = self.bc1(h)
        h = nf.relu(h)
#         h = F.dropout(h, p=0.2, training=self.training)
        
        h = self.fc2(h)
        h = self.bc2(h)
        h = nf.relu(h)
        
        h = self.fc3(h)
        h = self.bc3(h)
        h = nf.relu(h)
        
        h = self.fc4(h)
        h = nf.relu(h)
        
        h = self.fc5(h)
        out = nf.sigmoid(h)
        return out

model = Model()
model.cuda() # CUDA!

optimizer = optim.Adam(model.parameters(), lr=0.001)

## Train

In [183]:
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
    data, target = Variable(data.cuda()), Variable(target.cuda())
    
    optimizer.zero_grad()
    y_pred = model(data) # Predict
    
    loss = F.nll_loss(y_pred, target) # Negative Log Likelihood Loss
    loss.backward()
    optimizer.step()
    
    if batch_idx % 100 == 0:
        print('Train Epoch: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.data[0]))
    

