In [12]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7fe9f8049250>

In [62]:
USE_CUDA = torch.cuda.is_available() # gpu 사용


### init operations

In [20]:
param = torch.zeros(10,2)

param


    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
    0     0
[torch.FloatTensor of size 10x2]

In [30]:
nn.init.uniform(param,0,1)


 0.4010  0.4621
 0.4050  0.3157
 0.7087  0.3297
 0.2191  0.6134
 0.1386  0.3933
 0.7555  0.6869
 0.9257  0.1590
 0.5043  0.3523
 0.2203  0.2187
 0.6260  0.5523
[torch.FloatTensor of size 10x2]

In [29]:
nn.init.normal(param,0,1)


 0.4728  1.0049
-0.2871 -1.1619
 0.0276  0.5652
-0.0115  0.6706
-0.4929  1.5050
-2.3264  1.6169
-0.9026  0.1737
 0.0772 -0.9339
 0.0914  1.3940
-0.6877 -0.5058
[torch.FloatTensor of size 10x2]

In [32]:
nn.init.constant(param,2)


    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
    2     2
[torch.FloatTensor of size 10x2]

In [33]:
nn.init.xavier_uniform(param)


-0.3609 -0.4984
-0.5859  0.6930
-0.1205 -0.1635
-0.2600 -0.3537
-0.6467 -0.4151
 0.4322  0.1550
-0.2494 -0.2672
 0.4517  0.5097
 0.6813  0.2063
 0.3416 -0.0568
[torch.FloatTensor of size 10x2]

In [39]:
nn.init.xavier_normal(param)


-0.2309 -0.3713
-0.1997 -0.2521
 0.1566  0.1572
 0.3221  0.4639
-0.0468 -0.0074
 0.0200  0.1744
-0.3673  0.2169
 0.1647  0.5928
-0.9872 -0.4861
 0.2843  0.4612
[torch.FloatTensor of size 10x2]

### weight init 효과 비교 분석 

In [42]:
INIT_OPS = [None, nn.init.xavier_uniform] # weight init 안한거랑 xavier_uniform 한 경우
STEP = 3
LR = 0.01
BATCH_SIZE=64

In [43]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

In [65]:
for INIT_OP in INIT_OPS:
    op_name = INIT_OP.__name__ if INIT_OP else "None"
    writer = SummaryWriter(comment="-"+op_name)

    # 모델 선언
    model = nn.Sequential(nn.Linear(784,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,1024),
                                  nn.ReLU(),
                                  nn.Linear(1024,10))
    
    # 초기화
    if INIT_OP is not None:
        for name,param in model.named_parameters():
            if 'weight' in name:
                param.data = INIT_OP(param.data)
    
    if USE_CUDA:
        model = model.cuda()
    
    # 로스 펑션, 옵티마이저
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=LR)
    
    # 트레이닝
    print(op_name + " training start!")
    for step in range(STEP):
        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
            if USE_CUDA:
                inputs = inputs.cuda()
                targets = targets.cuda()
            model.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalars('data/weight_init/',{op_name : loss.data[0]},(i+1)+(step*len(train_dataset)//BATCH_SIZE))
    print("done")
writer.close()

None training start!
done
xavier_uniform training start!
done


In [66]:
!tensorboard --logdir runs

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
