In [4]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7fcb2c0f9730>

In [5]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터 

In [6]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [33]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size,num_layers=1,dropout_p=0.3):
        super(NN,self).__init__()
        self.layers = nn.Sequential()
        
        # input layer
        self.layers.add_module('input_layer',nn.Linear(input_size,hidden_size))
        self.layers.add_module('activation_0',nn.ReLU())
        self.layers.add_module('dropout_0',nn.Dropout(dropout_p))
        # hidden layers
        for l in range(num_layers):
            self.layers.add_module('hidden_layer_'+str(l+1),nn.Linear(hidden_size,hidden_size))
            self.layers.add_module('activation_'+str(l+1),nn.ReLU())
            self.layers.add_module('dropout_'+str(l+1),nn.Dropout(dropout_p))
            
        # output layer
        self.layers.add_module('output_layer',nn.Linear(hidden_size,output_size))
        
    # weight initialization
    def init_weight(self):
        for name, param in self.layers.named_parameters():
            if 'weight' in name:
                param.data = nn.init.xavier_normal(param.data)
            elif 'bias' in name:
                param.data = nn.init.normal(param.data)
        
    def forward(self,inputs):
        return self.layers(inputs)

In [34]:
def evaluation(data_loader,model):
    model.eval()
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [58]:
EPOCH=20
LR=0.01
HIDDEN_SIZE = 1024
BATCH_SIZE=64
NUM_LAYERS = 3
DROPOUT=0.2

model = NN(784,HIDDEN_SIZE,10,NUM_LAYERS,DROPOUT)
model.init_weight()
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)

In [59]:
model

NN(
  (layers): Sequential(
    (input_layer): Linear(in_features=784, out_features=1024)
    (activation_0): ReLU()
    (dropout_0): Dropout(p=0.2)
    (hidden_layer_1): Linear(in_features=1024, out_features=1024)
    (activation_1): ReLU()
    (dropout_1): Dropout(p=0.2)
    (hidden_layer_2): Linear(in_features=1024, out_features=1024)
    (activation_2): ReLU()
    (dropout_2): Dropout(p=0.2)
    (hidden_layer_3): Linear(in_features=1024, out_features=1024)
    (activation_3): ReLU()
    (dropout_3): Dropout(p=0.2)
    (output_layer): Linear(in_features=1024, out_features=10)
  )
)

In [60]:
%%time
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/20] [000/938] mean_loss : 3.702
[0/20] [500/938] mean_loss : 1.581
[1/20] [000/938] mean_loss : 0.670
[1/20] [500/938] mean_loss : 0.554
[2/20] [000/938] mean_loss : 0.651
[2/20] [500/938] mean_loss : 0.424
[3/20] [000/938] mean_loss : 0.329
[3/20] [500/938] mean_loss : 0.362
[4/20] [000/938] mean_loss : 0.346
[4/20] [500/938] mean_loss : 0.326
[5/20] [000/938] mean_loss : 0.288
[5/20] [500/938] mean_loss : 0.297
[6/20] [000/938] mean_loss : 0.127
[6/20] [500/938] mean_loss : 0.268
[7/20] [000/938] mean_loss : 0.320
[7/20] [500/938] mean_loss : 0.248
[8/20] [000/938] mean_loss : 0.205
[8/20] [500/938] mean_loss : 0.234
[9/20] [000/938] mean_loss : 0.370
[9/20] [500/938] mean_loss : 0.219
[10/20] [000/938] mean_loss : 0.212
[10/20] [500/938] mean_loss : 0.204
[11/20] [000/938] mean_loss : 0.187
[11/20] [500/938] mean_loss : 0.193
[12/20] [000/938] mean_loss : 0.205
[12/20] [500/938] mean_loss : 0.188
[13/20] [000/938] mean_loss : 0.359
[13/20] [500/938] mean_loss : 0.174
[14/20] [000

In [30]:
train accuracy :  0.9772
test accuracy :  0.9691

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
