In [4]:
import torch
from torch import nn
from torch import optim

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(2,3),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(3,4),
                                 nn.ReLU())
        self.fc_out = nn.Sequential(nn.Linear(4,1),
                                    nn.Sigmoid())
    def forward(self,x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        return x

model = MLP()
print(model(torch.randn(2,2)).shape)
print(model)

torch.Size([2, 1])
MLP(
  (fc1): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
  (fc_out): Sequential(
    (0): Linear(in_features=4, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


# .parameters() vs .modules() vs .children()

### parameters

In [None]:
list(model.parameters())[0]
# [layer0 weight 값, layer0 bias 값, layer1 weight 값, layer1 bias 값, ...]

In [9]:
for name, p in model.named_parameters(): # named_parameters : 이름과 파라미터 함께 전달함
    print(name)
    print(p)

fc1.0.weight
Parameter containing:
tensor([[-0.0592, -0.5752],
        [ 0.5569,  0.2425],
        [-0.2149,  0.6800]])
fc1.0.bias
Parameter containing:
tensor([ 0.5418, -0.2572,  0.5098])
fc2.0.weight
Parameter containing:
tensor([[ 0.2760,  0.0679, -0.0336],
        [-0.3020,  0.5427,  0.2330],
        [-0.5526,  0.3655,  0.1132],
        [-0.1456, -0.3256,  0.0959]])
fc2.0.bias
Parameter containing:
tensor([ 0.1120, -0.1363, -0.3668,  0.0619])
fc_out.weight
Parameter containing:
tensor([[-0.1743, -0.2587,  0.2386,  0.0881],
        [ 0.1163,  0.4118, -0.2429, -0.3380],
        [-0.0698, -0.3627, -0.1248,  0.4156],
        [-0.4624, -0.4776, -0.2725,  0.1400],
        [ 0.3655, -0.1090,  0.4498, -0.2792],
        [ 0.0872,  0.2323, -0.3417,  0.2330],
        [ 0.0080, -0.2560,  0.2411,  0.3640],
        [-0.3587,  0.2671, -0.2828,  0.0193],
        [ 0.4867,  0.0601,  0.4136,  0.1392],
        [ 0.0487, -0.1529,  0.4190,  0.2929]], requires_grad=True)
fc_out.bias
Parameter containing

transfer-learning 할 때 좋다 !
- 만약 10개 분류하는 걸 들고와서, 5개 분류하는 모델로 재학습할 때, 기존에 학습했던 parameters를 사용할 수 있도록
- 여기서 모델 weight를 들고 온다고 할 때, weight inital로 사용해도 되고, freeze해도 된다

In [8]:
# for tranfer learning
model=MLP()

for p in model.parameters(): # 전체 freeze
    p.requires_grad=False # 미분 불가능하도록 False

print([p for p in model.parameters() if p.requires_grad])
print("-"*30)
model.fc_out = nn.Linear(4,10)

params = [p for p in model.parameters() if p.requires_grad]
print([p for p in model.parameters() if p.requires_grad])

optimizer = optim.Adam(params, lr=0.1) # freeze된 param 빼고 전달

[]
------------------------------
[Parameter containing:
tensor([[-0.1743, -0.2587,  0.2386,  0.0881],
        [ 0.1163,  0.4118, -0.2429, -0.3380],
        [-0.0698, -0.3627, -0.1248,  0.4156],
        [-0.4624, -0.4776, -0.2725,  0.1400],
        [ 0.3655, -0.1090,  0.4498, -0.2792],
        [ 0.0872,  0.2323, -0.3417,  0.2330],
        [ 0.0080, -0.2560,  0.2411,  0.3640],
        [-0.3587,  0.2671, -0.2828,  0.0193],
        [ 0.4867,  0.0601,  0.4136,  0.1392],
        [ 0.0487, -0.1529,  0.4190,  0.2929]], requires_grad=True), Parameter containing:
tensor([ 0.3205,  0.2924, -0.2135,  0.1196,  0.3307,  0.0110, -0.0201,  0.4570,
         0.2061, -0.3330], requires_grad=True)]


### modules

In [10]:
model.modules()

<generator object Module.modules at 0x7f89608357b0>

In [12]:
list(model.modules())
# MLP 안에 있는 fc1, fc2, fc_out 먼저 확인
  # fc1 속에 있는 Sequential 확인 > 내용물 확인
  # fc2 속에 있는 Sequential 확인 > 내용물 확인
  # fc_out 속에 있는 Sequential 확인 > 내용물 확인

[MLP(
   (fc1): Sequential(
     (0): Linear(in_features=2, out_features=3, bias=True)
     (1): ReLU()
   )
   (fc2): Sequential(
     (0): Linear(in_features=3, out_features=4, bias=True)
     (1): ReLU()
   )
   (fc_out): Linear(in_features=4, out_features=10, bias=True)
 ),
 Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=2, out_features=3, bias=True),
 ReLU(),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=3, out_features=4, bias=True),
 ReLU(),
 Linear(in_features=4, out_features=10, bias=True)]

In [13]:
print([m for m in model.modules() if isinstance(m,nn.Linear)])

[Linear(in_features=2, out_features=3, bias=True), Linear(in_features=3, out_features=4, bias=True), Linear(in_features=4, out_features=10, bias=True)]


In [15]:
# weight initialization에 활용
for m in model.modules():
    if isinstance(m, nn.Linear):
        # nn.init.kaiming_normal_(m.weight) # nn.Linear에 해당하는 weight init
        nn.init.constant_(m.weight, 1) # 아예 1로 바꿔버리는 것

print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

[Parameter containing:
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]]), Parameter containing:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]), Parameter containing:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True)]


In [18]:
print([m.weight for m in [model.parameters()] if isinstance(m, nn.Linear)]) # parameters 안됨

[]


### children

In [19]:
model.children()

<generator object Module.children at 0x7f8960835e40>

In [23]:
list(model.children()) # fc1, fc2, fc_out만 해서 내용물만 보여준다

[Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=4, out_features=10, bias=True)]

In [21]:
x = torch.randn(2,2)
list(model.children())[0](x) # 각 레이어에 접근 가능

tensor([[1.7479, 0.9489, 1.7159],
        [0.0000, 0.0000, 0.0000]])

In [24]:
sub_network = nn.Sequential(*list(model.children())[:2]) # 서브 네트워크를 만들어줄 수 있다
print(sub_network)
print(sub_network(x))

Sequential(
  (0): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
)
tensor([[4.5248, 4.2764, 4.0459, 4.4747],
        [0.1120, 0.0000, 0.0000, 0.0619]])
