# Linear Layer

In [1]:
import torch

## Raw Linear Layer

$$\begin{gathered}
y=x\cdot{W}+b, \\
\text{where }x\in\mathbb{R}^{N\times{n}}\text{, }y\in\mathbb{R}^{N\times{m}}. \\
\\
\text{Thus, }W\in\mathbb{R}^{n\times{m}}\text{ and }b\in\mathbb{R}^m.
\end{gathered}$$

In [2]:
W = torch.FloatTensor([[1, 2],
                       [3, 4],
                       [5, 6]])
b = torch.FloatTensor([2, 2])

In [3]:
print(W.size())
print(b.size())

torch.Size([3, 2])
torch.Size([2])


In [4]:
def linear(x, W, b):
    y = torch.matmul(x, W) + b
    
    return y

In [5]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])

print(x.size())

torch.Size([4, 3])


In [6]:
y = linear(x, W, b)
y

tensor([[11., 14.],
        [20., 26.],
        [29., 38.],
        [38., 50.]])

In [7]:
print(y.size())

torch.Size([4, 2])


## nn.Module

In [8]:
import torch.nn as nn

- import torch.nn as nn 후에
- nn.Module 이라는 추상 클래스를 상속받아서 씀

In [61]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__() #이건 내가 위에서 오버라이딩 하기 전의 부모 클래스의 __init__메소드를 불러오는 거인 듯
        
        self.W = torch.FloatTensor(input_dim, output_dim)
        self.b = torch.FloatTensor(output_dim)

    # You should override 'forward' method to implement detail.
    # The input arguments and outputs can be designed as you wish.
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

In [62]:
x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])
# 존나 헷갈린다... 얘는 minibatch 4, input dim은 3

In [63]:
linear = MyLinear(3, 2) # input dim이 3이고 output dim이 2인 w값 생성

y = linear(x)
y

tensor([[-6.6216e-07,  4.5916e-41],
        [-6.6216e-07,  4.5916e-41],
        [-6.6216e-07,  4.5916e-41],
        [-6.6216e-07,  4.5916e-41]])

In [19]:
print(y.size())

torch.Size([4, 2])


In [21]:
# nn.Module을 상속받은 instance들은 (여기서는 linear) parameters라는 메소드를 가짐
# 이건 trainable한 parameter들을 반환하는 메소드
for p in linear.parameters():
    print(p)
#여기선 학습 가능한 파라미터가 없음

You can see that there is no weight parameters to learn.
Above way can forward(or calculate) values, but it cannot be trained.

### Correct way: nn.Parameter

In [22]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim)) # nn.Parameter로 랩핑을 해야한데...그래야 학습이 가능하대...
        self.b = nn.Parameter(torch.FloatTensor(output_dim))
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

Reference: https://pytorch.org/docs/stable/nn.html#torch.nn.Parameter

A kind of Tensor that is to be considered a module parameter.

Parameters are Tensor subclasses, that have a very special property when used with Module s - when they’re assigned as Module attributes they are automatically added to the list of its parameters, and will appear e.g. in parameters() iterator. Assigning a Tensor doesn’t have such effect. This is because one might want to cache some temporary state, like last hidden state of the RNN, in the model. If there was no such class as Parameter, these temporaries would get registered too.

In [23]:
linear = MyLinear(3, 2)

y = linear(x) # forward가 자동으로 매칭이 된다고 함

In [24]:
print(y.size())

torch.Size([4, 2])


In [25]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[7.0065e-44, 7.9874e-44],
        [1.1771e-43, 6.8664e-44],
        [7.0065e-44, 8.1275e-44]], requires_grad=True)
Parameter containing:
tensor([ 3.8016e-39, -2.6624e-08], requires_grad=True)


## nn.Linear

- 앞으로는 이거 쓰면 됨
- 그냥 nn.Linear클래스 갖다가 인스턴스 만들면 됨

In [32]:
linear = nn.Linear(3, 2)

x = torch.FloatTensor([[1, 1, 1],
                       [2, 2, 2],
                       [3, 3, 3],
                       [4, 4, 4]])

y = linear(x) # 여기도 forward가 없음
y

tensor([[-1.4715,  0.9805],
        [-2.4646,  1.3880],
        [-3.4576,  1.7955],
        [-4.4506,  2.2030]], grad_fn=<AddmmBackward>)

In [33]:
print(y.size())

torch.Size([4, 2])


In [35]:
for p in linear.parameters():
    print(p)
    print(p.size())

Parameter containing:
tensor([[-0.4047, -0.2731, -0.3153],
        [ 0.0556,  0.2733,  0.0785]], requires_grad=True)
torch.Size([2, 3])
Parameter containing:
tensor([-0.4785,  0.5730], requires_grad=True)
torch.Size([2])


### nn.Module can contain other nn.Module's child classes.

In [36]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [54]:
class MyLinear(nn.Module):

    def __init__(self, input_dim=3, output_dim=2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [55]:
linear = MyLinear(3, 2)

y = linear(x)

In [56]:
print(y.size())

torch.Size([4, 2])


In [57]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[-0.1196,  0.1126,  0.2035],
        [-0.5727, -0.4583, -0.1385]], requires_grad=True)
Parameter containing:
tensor([ 0.4227, -0.4437], requires_grad=True)
