In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
import pickle,os,shutil
torch.manual_seed(1)

<torch._C.Generator at 0x7f29128ad730>

In [2]:
port = pickle.load(open("port.info","rb"))

In [2]:
# 텐서보드 포트 설정
port = pickle.load(open("port.info","rb"))[os.getcwd().split("/")[-2]]

# 텐서보드 데이터 파일 초기화
try:
    shutil.rmtree('runs/')
except:
    pass

In [3]:
port

'6006'

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

In [None]:
nn.init.

In [9]:
class NN(nn.Module):
    def __init__(self,hidden_size):
        super(NN,self).__init__()
        self.l1 = nn.Linear(784,hidden_size)
        self.l2 = nn.Linear(hidden_size,10)
    
    def init_weight(self):
        for name,param in self.named_parameters():
            if 'weight' in name:
                #param.data = nn.init.uniform(param.data,0,1)
                #param.data = nn.init.normal(param.data,0,1)
                #param.data = nn.init.kaiming_normal(param.data)
                param.data = nn.init.xavier_normal(param.data)
            elif 'bias' in name:
                param.data = nn.init.constant(param.data,0.01)
                
    def forward(self,inputs):
        outputs = F.relu(self.l1(inputs))
        return self.l2(outputs)
    
model = NN(1024)
model.init_weight()

### init operations

In [3]:
param = torch.zeros(10,2)

param


    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
[torch.FloatTensor of size 10x2]

In [4]:
nn.init.uniform(param,0,1)


 0.7576  0.2793
 0.4031  0.7347
 0.0293  0.7999
 0.3971  0.7544
 0.5695  0.4388
 0.6387  0.5247
 0.6826  0.3051
 0.4635  0.4550
 0.5725  0.4980
 0.9371  0.6556
[torch.FloatTensor of size 10x2]

In [5]:
nn.init.normal(param,0,1)


-0.8923 -0.0583
-0.1955 -0.9656
 0.4224  0.2673
-0.4212 -0.5107
-1.5727 -0.1232
 3.5870 -1.8313
 1.5987 -1.2770
 0.3255 -0.4791
 1.3790  2.5286
 0.4107 -0.9880
[torch.FloatTensor of size 10x2]

In [6]:
nn.init.constant(param,2)


    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
[torch.FloatTensor of size 10x2]

In [7]:
nn.init.xavier_uniform(param)


-0.4571  0.4593
 0.4293  0.6271
-0.3964 -0.1164
-0.0137  0.1033
-0.5366 -0.5018
 0.3847 -0.1658
 0.3454  0.0403
 0.2322  0.1555
 0.2571  0.3505
-0.6549  0.3559
[torch.FloatTensor of size 10x2]

In [8]:
nn.init.xavier_normal(param)


 0.1166  0.2816
-0.2584  0.3590
-0.2793  0.1851
 0.1189 -0.3395
-0.2256  0.2594
-0.1620 -0.2682
-0.6707  0.4002
-0.0172 -0.3350
 0.1279 -0.4634
 0.1540 -0.1153
[torch.FloatTensor of size 10x2]

In [9]:
nn.init.kaiming_normal(param)


-2.5667 -1.4303
 0.5009  0.5438
-0.4057  1.1341
-1.1115  0.3501
-0.7703 -0.1473
 0.6272  1.0935
 0.0939  1.2381
-1.3459  0.5119
-0.6933 -0.1668
-0.9999 -1.6476
[torch.FloatTensor of size 10x2]

In [10]:
nn.init.kaiming_uniform(param)


-1.2943  0.6664
 0.5546  1.1218
-0.8963  0.3755
-0.6306 -0.3891
-1.3803 -0.7894
-0.5305  0.7407
 0.3164  0.4278
 1.7288  1.6881
 1.1813  0.0552
-1.1982  1.3536
[torch.FloatTensor of size 10x2]

### weight init 효과 비교 분석 

In [11]:
INIT_OPS = [None, nn.init.xavier_normal] # weight init 안한거랑 xavier_normal 한 경우
EPOCH = 3
LR = 0.01
BATCH_SIZE=64

In [12]:
train_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

In [21]:
for INIT_OP in INIT_OPS:
    op_name = INIT_OP.__name__ if INIT_OP else "None"
    writer = SummaryWriter(comment="-"+op_name)

    # 모델 선언
    model = nn.Sequential(nn.Linear(784,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,10))
    
    # 초기화
    if INIT_OP is not None:
        for name,param in model.named_parameters():
            if 'weight' in name:
                param.data = INIT_OP(param.data)
    
    if USE_CUDA:
        model = model.cuda()
    
    # 로스 펑션, 옵티마이저
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=LR)
    
    # 트레이닝
    print(op_name + " training start!")
    for epoch in range(EPOCH):
        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
            if USE_CUDA:
                inputs = inputs.cuda()
                targets = targets.cuda()
            model.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalars('data/weight_init/',{op_name : loss.data[0]},(i+1)+(epoch*len(train_loader)))
    print("done")
writer.close()

None training start!
done
xavier_normal training start!
done


In [4]:
port

'6006'

In [66]:
!tensorboard --logdir runs --port 6006

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
