In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
import pickle,os,shutil
torch.manual_seed(1)

<torch._C.Generator at 0x7fc3980a8730>

In [12]:
# 텐서보드 포트 설정
port = pickle.load(open("port.info","rb"))[os.getcwd().split("/")[-2]]

# 텐서보드 데이터 파일 초기화
try:
    shutil.rmtree('runs/')
except:
    pass

In [3]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터

In [4]:
BATCH_SIZE = 64

In [7]:
train_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [8]:
class NN(nn.Module):
    def __init__(self,hidden_size):
        super(NN,self).__init__()
        self.l1 = nn.Linear(784,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,10)
        
        # In : (배치사이즈, 차원수) => Out : (배치사이즈, 차원수)
        self.bn1 = nn.BatchNorm1d(hidden_size) 
        self.bn2 = nn.BatchNorm1d(hidden_size)
    def forward(self,inputs):
        outputs = self.bn1(self.l1(inputs))
        outputs = F.relu(outputs)
        outputs = self.bn2(self.l2(outputs))
        outputs = F.relu(outputs)
        return self.l3(outputs)
    
model = NN(512)
model.train()
# 트레이닝 Process

model.eval()
# 테스트(실제 모델 러닝) Process

NN(
  (l1): Linear(in_features=784, out_features=512)
  (l2): Linear(in_features=512, out_features=512)
  (l3): Linear(in_features=512, out_features=10)
  (bn1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
  (bn2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
)

### Optimizer 

In [9]:
LR=0.1

sgd_momentum = optim.SGD(model.parameters(),lr=LR,momentum=0.9)
sgd_nesterov_momentum = optim.SGD(model.parameters(),lr=LR,momentum=0.9,nesterov=True)

adagrad = optim.Adagrad(model.parameters(),lr=LR)
rmsprop = optim.RMSprop(model.parameters(),lr=LR,alpha=0.99)
adam = optim.Adam(model.parameters(),lr=LR,betas=(0.9,0.999))

###  

In [13]:
EPOCH=5
LR=0.1
HIDDEN_SIZE = 512
OPTIM_OPS = ["sgd", "sgd_nesterov_momentum"] 

for op_name in OPTIM_OPS:
    writer = SummaryWriter(comment="-"+op_name)
    
    model = NN(HIDDEN_SIZE)
    if USE_CUDA:
        model = model.cuda()
    loss_function = nn.CrossEntropyLoss()
    
    # Optimizer 선언
    if op_name=="sgd":
        optimizer = optim.SGD(model.parameters(),lr=LR)
    elif op_name=="sgd_nesterov_momentum":
        optimizer = optim.SGD(model.parameters(),lr=LR,momentum=0.9,nesterov=True)

    
    # 트레이닝
    print(op_name + " training start!")
    for epoch in range(EPOCH):
        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
            if USE_CUDA:
                inputs = inputs.cuda()
                targets = targets.cuda()
            model.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalars('data/optimizer/',{op_name : loss.data[0]},(i+1)+(epoch*len(train_loader)))
    print("done")
writer.close()

sgd training start!
done
sgd_nesterov_momentum training start!
done


In [14]:
!tensorboard --logdir runs --port 6006

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
