## Linear Layer

In [1]:
import torch

## Raw Linear Layer
$$ y = x \cdot W + b,$$
$$ where x \in \mathbb{R}^{N \times n}, y \in \mathbb{R}^{N \times m}, $$
$$ Thus, W \in \mathbb{R}^{N \times m} and b \in \mathbb{R}^{m}. $$

In [2]:
W = torch.FloatTensor([[1, 2],
                      [3, 4],
                      [5, 6]])
b = torch.FloatTensor([2, 2])

In [3]:
print(W.size())
print(b.size())

torch.Size([3, 2])
torch.Size([2])


In [4]:
def linear(x, W, b):
    y = torch.matmul(x, W) + b
    return y

In [5]:
x = torch.FloatTensor([[1, 1, 1],
                      [2, 2, 2],
                      [3, 3, 3],
                      [4, 4, 4]])
print(x.size())

torch.Size([4, 3])


In [6]:
y = linear(x, W, b)

In [8]:
print(y.size())

torch.Size([4, 2])


## nn.Module

In [9]:
import torch.nn as nn # 굉장히 유용하게 사용하는것

In [10]:
class MyLinear(nn.Module): ## nn.Module을 상속함.
    
    def __init__(self, input_dim = 3, output_dim = 2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = torch.FloatTensor(input_dim, output_dim)
        self.b = torch.FloatTensor(output_dim)
        
    # You should override 'forward' method to implement detail.
    # The input argument and outputs can be designed as you wish.
    
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        return y

In [11]:
linear = MyLinear(3, 2)
y = linear(x)

In [12]:
print(y.size())

torch.Size([4, 2])


In [13]:
for p in linear.parameters():
    print(p)

You can see that there is no weight parameters to learn. Above way can forward(or calculate) values, but it cannot be trained.

## Correct way: nn.Parameter

In [14]:
class MyLinear(nn.Module):
    
    def __init__(self, input_dim = 3, output_dim = 2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.W = nn.Parameter(torch.FloatTensor(input_dim, output_dim))
        self.b = nn.Parameter(torch.FloatTensor(output_dim))
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = torch.matmul(x, self.W) + self.b
        # |y| = (batch_size, input_dim) * (input_dim, output_dim)
        #     = (batch_size, output_dim)
        
        return y

Reference: https://pytorch.org/docs/stable/nn.html/#torch.nn.Parameter

A kind of Tensor that is to be considered a module parameter

Parameters are Tensor subclasses, that have a very special property when used with Modules - when they're assigned as Module attributes they are automatically added to the list of its parameters, and will appear e.g. in paramters() iterator.
Assigning a Tensor doesn't have such effect. This is because one might want to cache some temporary state, like last hidden state of the RNN, in the model. If there was no such class as Parameter, these temporaries would get registered too.

In [15]:
linear = MyLinear(3, 2)
y = linear(x)

In [16]:
print(y.size())

torch.Size([4, 2])


In [17]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[0.0000e+00, 1.4586e-19],
        [4.2729e-05, 1.3341e-08],
        [6.7354e+22, 3.2908e+21]], requires_grad=True)
Parameter containing:
tensor([9.1477e-41, 0.0000e+00], requires_grad=True)


## nn.Linear

In [18]:
linear = nn.Linear(3, 2)
y = linear(x) # 자동으로 forward를 불러와서 fc를 해준다.

In [19]:
print(y.size())

torch.Size([4, 2])


In [20]:
for p in linear.parameters():
    print(p)

Parameter containing:
tensor([[ 0.3150,  0.3283,  0.1125],
        [-0.1527, -0.4191, -0.2179]], requires_grad=True)
Parameter containing:
tensor([-0.3573,  0.5327], requires_grad=True)


## nn.Module can contain other nn.Module's child classes.

In [21]:
class MyLinear(nn.Module):
    
    def __init__(self, input_dim = 3, output_dim = 2):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        super().__init__()
        
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # |x| = (batch_size, input_dim)
        y = self.linear(x)
        # |y| = (batch_size, output_dim)
        
        return y

In [22]:
linear = MyLinear(3, 2)
y = linear(x)

In [23]:
print(y.size())

torch.Size([4, 2])


In [24]:
for p in nn.Parameters():
    print(p)

AttributeError: module 'torch.nn' has no attribute 'Parameters'