In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7f87c4048730>

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터 

In [3]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [11]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size,num_layers=1,dropout_p=0.3):
        super(NN,self).__init__()
        self.layers = nn.Sequential()
        
        # input layer
        self.layers.add_module('input_layer',nn.Linear(input_size,hidden_size))
        self.layers.add_module('activation_0',nn.ReLU())
        self.layers.add_module('dropout_0',nn.Dropout(dropout_p))
        # hidden layers
        for l in range(num_layers):
            self.layers.add_module('hidden_layer_'+str(l+1),nn.Linear(hidden_size,hidden_size))
            self.layers.add_module('activation_'+str(l+1),nn.ReLU())
            self.layers.add_module('dropout_'+str(l+1),nn.Dropout(dropout_p))
            
        # output layer
        self.layers.add_module('output_layer',nn.Linear(hidden_size,output_size))
        
    # weight initialization
    def init_weight(self):
        for name, param in self.layers.named_parameters():
            if 'weight' in name:
                param.data = nn.init.xavier_normal(param.data)
            elif 'bias' in name:
                param.data = nn.init.normal(param.data)
        
    def forward(self,inputs):
        return self.layers(inputs)

In [12]:
def evaluation(data_loader,model):
    model.eval()
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [36]:
EPOCH=50
LR=0.01
HIDDEN_SIZE = 1024
BATCH_SIZE=64
NUM_LAYERS = 2
DROPOUT=0.0

model = NN(784,HIDDEN_SIZE,10,NUM_LAYERS,DROPOUT)
model.init_weight()
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)

In [37]:
model

NN(
  (layers): Sequential(
    (input_layer): Linear(in_features=784, out_features=1024)
    (activation_0): ReLU()
    (dropout_0): Dropout(p=0.0)
    (hidden_layer_1): Linear(in_features=1024, out_features=1024)
    (activation_1): ReLU()
    (dropout_1): Dropout(p=0.0)
    (hidden_layer_2): Linear(in_features=1024, out_features=1024)
    (activation_2): ReLU()
    (dropout_2): Dropout(p=0.0)
    (output_layer): Linear(in_features=1024, out_features=10)
  )
)

In [38]:
%%time
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/50] [000/938] mean_loss : 4.619
[0/50] [500/938] mean_loss : 1.055
[1/50] [000/938] mean_loss : 0.502
[1/50] [500/938] mean_loss : 0.367
[2/50] [000/938] mean_loss : 0.359
[2/50] [500/938] mean_loss : 0.307
[3/50] [000/938] mean_loss : 0.218
[3/50] [500/938] mean_loss : 0.279
[4/50] [000/938] mean_loss : 0.144
[4/50] [500/938] mean_loss : 0.250
[5/50] [000/938] mean_loss : 0.273
[5/50] [500/938] mean_loss : 0.225
[6/50] [000/938] mean_loss : 0.173
[6/50] [500/938] mean_loss : 0.205
[7/50] [000/938] mean_loss : 0.232
[7/50] [500/938] mean_loss : 0.189
[8/50] [000/938] mean_loss : 0.146
[8/50] [500/938] mean_loss : 0.178
[9/50] [000/938] mean_loss : 0.185
[9/50] [500/938] mean_loss : 0.164
[10/50] [000/938] mean_loss : 0.203
[10/50] [500/938] mean_loss : 0.149
[11/50] [000/938] mean_loss : 0.258
[11/50] [500/938] mean_loss : 0.138
[12/50] [000/938] mean_loss : 0.228
[12/50] [500/938] mean_loss : 0.129
[13/50] [000/938] mean_loss : 0.150
[13/50] [500/938] mean_loss : 0.123
[14/50] [000

In [30]:
train accuracy :  0.9815666666666667
test accuracy :  0.9704
    
train accuracy :  0.9786
test accuracy :  0.972

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
