In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7f3884066730>

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### init operations

In [3]:
param = torch.zeros(10,2)

param


    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
[torch.FloatTensor of size 10x2]

In [4]:
nn.init.uniform(param,0,1)


 0.7576  0.2793
 0.4031  0.7347
 0.0293  0.7999
 0.3971  0.7544
 0.5695  0.4388
 0.6387  0.5247
 0.6826  0.3051
 0.4635  0.4550
 0.5725  0.4980
 0.9371  0.6556
[torch.FloatTensor of size 10x2]

In [5]:
nn.init.normal(param,0,1)


-0.8923 -0.0583
-0.1955 -0.9656
 0.4224  0.2673
-0.4212 -0.5107
-1.5727 -0.1232
 3.5870 -1.8313
 1.5987 -1.2770
 0.3255 -0.4791
 1.3790  2.5286
 0.4107 -0.9880
[torch.FloatTensor of size 10x2]

In [6]:
nn.init.constant(param,2)


    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
[torch.FloatTensor of size 10x2]

In [33]:
nn.init.xavier_uniform(param)


-0.3609 -0.4984
-0.5859  0.6930
-0.1205 -0.1635
-0.2600 -0.3537
-0.6467 -0.4151
 0.4322  0.1550
-0.2494 -0.2672
 0.4517  0.5097
 0.6813  0.2063
 0.3416 -0.0568
[torch.FloatTensor of size 10x2]

In [39]:
nn.init.xavier_normal(param)


-0.2309 -0.3713
-0.1997 -0.2521
 0.1566  0.1572
 0.3221  0.4639
-0.0468 -0.0074
 0.0200  0.1744
-0.3673  0.2169
 0.1647  0.5928
-0.9872 -0.4861
 0.2843  0.4612
[torch.FloatTensor of size 10x2]

In [7]:
nn.init.kaiming_normal(param)


-0.9081  0.5423
 0.1103 -2.2590
 0.6067 -0.1383
 0.8310 -0.2477
-0.8029  0.2366
 0.2857  0.6898
-0.6331  0.8795
-0.6842  0.4533
 0.2912 -0.8317
-0.5525  0.6355
[torch.FloatTensor of size 10x2]

In [8]:
nn.init.kaiming_uniform(param)


 1.5864 -0.3205
 0.9765  0.7500
-1.1196 -1.4730
 1.6626  0.0904
 1.1872  0.3589
 0.5571  1.2938
 1.6424 -1.1494
 0.2166  1.2926
 1.2548  1.0760
-1.2535 -1.2473
[torch.FloatTensor of size 10x2]

### weight init 효과 비교 분석 

In [42]:
INIT_OPS = [None, nn.init.xavier_uniform] # weight init 안한거랑 xavier_uniform 한 경우
EPOCH = 3
LR = 0.01
BATCH_SIZE=64

In [43]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

In [65]:
for INIT_OP in INIT_OPS:
    op_name = INIT_OP.__name__ if INIT_OP else "None"
    writer = SummaryWriter(comment="-"+op_name)

    # 모델 선언
    model = nn.Sequential(nn.Linear(784,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,10))
    
    # 초기화
    if INIT_OP is not None:
        for name,param in model.named_parameters():
            if 'weight' in name:
                param.data = INIT_OP(param.data)
    
    if USE_CUDA:
        model = model.cuda()
    
    # 로스 펑션, 옵티마이저
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=LR)
    
    # 트레이닝
    print(op_name + " training start!")
    for epoch in range(EPOCH):
        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
            if USE_CUDA:
                inputs = inputs.cuda()
                targets = targets.cuda()
            model.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalars('data/weight_init/',{op_name : loss.data[0]},(i+1)+(epoch*len(train_loader))
    print("done")
writer.close()

None training start!
done
xavier_uniform training start!
done


In [66]:
!tensorboard --logdir runs

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
