In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
import pickle,os,shutil
torch.manual_seed(1)

<torch._C.Generator at 0x7fc7240d4730>

In [2]:
# 텐서보드 포트 설정
port = pickle.load(open("port.info","rb"))[os.getcwd().split("/")[-2]]

# 텐서보드 데이터 파일 초기화
try:
    shutil.rmtree('runs/')
except:
    pass

In [3]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터

In [11]:
BATCH_SIZE = 64

In [12]:
train_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2,
                                           drop_last=True) # 이동평균이 튀는걸 방지

test_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [5]:
class NN(nn.Module):
    def __init__(self,hidden_size):
        super(NN,self).__init__()
        self.l1 = nn.Linear(784,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,10)
        
        # In : (배치사이즈, 차원수) => Out : (배치사이즈, 차원수)
        self.bn1 = nn.BatchNorm1d(hidden_size) 
        self.bn2 = nn.BatchNorm1d(hidden_size)
    def forward(self,inputs):
        outputs = self.bn1(self.l1(inputs))
        outputs = F.relu(outputs)
        outputs = self.bn2(self.l2(outputs))
        outputs = F.relu(outputs)
        return self.l3(outputs)
    
model = NN(512)
model.train()
# 트레이닝 Process

model.eval()
# 테스트(실제 모델 러닝) Process

NN(
  (l1): Linear(in_features=784, out_features=512)
  (l2): Linear(in_features=512, out_features=512)
  (l3): Linear(in_features=512, out_features=10)
  (bn1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
  (bn2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
)

In [6]:
def evaluation(data_loader,model):
    model.eval() # for batch norm at test time!
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [7]:
EPOCH=5
LR=0.1
HIDDEN_SIZE = 512

model = NN(HIDDEN_SIZE)
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)

In [8]:
model

NN(
  (l1): Linear(in_features=784, out_features=512)
  (l2): Linear(in_features=512, out_features=512)
  (l3): Linear(in_features=512, out_features=10)
  (bn1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
  (bn2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
)

### TODO : inner loop(i) 500번마다의 평균 loss를 tensorboardX에 플롯의 그려보고 model의 graph도 그려보기

In [4]:
writer = SummaryWriter(comment="-batch-norm")

In [13]:
%%time
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/5] [000/938] mean_loss : 2.277
[0/5] [500/938] mean_loss : 0.233
[1/5] [000/938] mean_loss : 0.048
[1/5] [500/938] mean_loss : 0.074
[2/5] [000/938] mean_loss : 0.077
[2/5] [500/938] mean_loss : 0.044
[3/5] [000/938] mean_loss : 0.015
[3/5] [500/938] mean_loss : 0.030
[4/5] [000/938] mean_loss : 0.016
[4/5] [500/938] mean_loss : 0.024


train accuracy :  0.9980166666666667
test accuracy :  0.982
CPU times: user 5min 33s, sys: 2 s, total: 5min 35s
Wall time: 1min 57s


In [5]:
port

'6006'

In [None]:
!tensorboard --logdir runs --port 6006