In [1]:
import torch 
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))

X = torch.rand(2,20)
net(X)

tensor([[-0.0426, -0.2457, -0.0303,  0.1832,  0.0788, -0.0737,  0.3692, -0.1708,
         -0.1082, -0.0370],
        [-0.0799, -0.2310, -0.0334,  0.1268,  0.0025, -0.0847,  0.1970, -0.0632,
         -0.0298,  0.0762]], grad_fn=<AddmmBackward>)

### 5.1.1 A Custom Block

In [2]:
class MLP(nn.Module):
    # connect layers
    def __init__(self):
        '''
        Call the Call the constructor of the 'MLP' parent class 'Module' to perfrom
        the necessary initialization.In this way, other function arguments 
        can also be specified during class instantiation, such as the model parameters, 'params'
        '''
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256, 10)
        
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))
    
net = MLP()
net(X)

tensor([[ 0.0715,  0.1927,  0.1439, -0.0932, -0.0532, -0.1742, -0.1029, -0.1537,
          0.0158, -0.1765],
        [ 0.1853,  0.3218,  0.1675, -0.0182,  0.0048, -0.0685,  0.0464, -0.1833,
          0.1286, -0.0530]], grad_fn=<AddmmBackward>)

### 5.1.2 The Sequential Block

In order to make Sequential, we need two functions.
1. a function to append blocks one by one to a list.
2. A forward propagation function to pass an input through the chain of blocks, in the same order as they were appended.

In [3]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            '''
            'module'is an instance  of a 'Module' subclass.
            We save it in the member of variable '_modules' of the Module class
            its type is Ordered Dictionary
            '''
            self._modules[str(idx)]=module
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X
    
net = MySequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))
net(X)

tensor([[ 0.1092, -0.1527,  0.1487, -0.1246, -0.0436,  0.0195, -0.1831, -0.0044,
         -0.0608,  0.0662],
        [-0.0462, -0.2063,  0.2539, -0.0981,  0.0628,  0.1115, -0.2262, -0.0914,
          0.0724,  0.2137]], grad_fn=<AddmmBackward>)

### 5.1.3 Executing Code in the Forward Propagation Function

In [4]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not comput gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20,20), requires_grad=False)
        self.linear = nn.Linear(20,20)
    
    def forward(self,X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight)+1)
        
        X = self.linear(X)
        
        while X.abs().sum()>1:
            X/=2
        return X.sum()
    
# while 돌리는 구간은 사실상, 현실의 어떤 task에도 사용되지 않음.
# 즉, 중요한거는 이 흐름을 파악하는것.
net = FixedHiddenMLP()
net(X)

tensor(-0.1435, grad_fn=<SumBackward0>)

In [5]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64), nn.ReLU(),
                                nn.Linear(64,32), nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self, X):
        return self.linear(self.net(X))
    
chimera = nn.Sequential(NestMLP(), nn.Linear(16,20), FixedHiddenMLP())
chimera(X)

tensor(0.1180, grad_fn=<SumBackward0>)

### Built in initialization

In [6]:
# Initialize all the parameters to a Gaussian random variables with standard deviation 0.01, bias = 0
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)

# Initialize all the parameters to a given constant value
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)

# Also can apply different initializers for certain blocks.
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
def init_42(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight, 42)

### Custom Initialization

In [9]:
def my_init(m):
    if type(m)==nn.Linear:
        print(
        "Init",
        *[(name, param.shape) for name, param in m.named_parameters()[0]])
    
        nn.init.unifor_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

### Tied Parameters
shared layer의 weight는 모두 같다.

In [11]:
X = torch.rand(20,4)

shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8,1))
net(X)

tensor([[0.1813],
        [0.1813],
        [0.1730],
        [0.1812],
        [0.1859],
        [0.1815],
        [0.1818],
        [0.1756],
        [0.1811],
        [0.1833],
        [0.1816],
        [0.1855],
        [0.1814],
        [0.1805],
        [0.1820],
        [0.1816],
        [0.1807],
        [0.1814],
        [0.1813],
        [0.1802]], grad_fn=<AddmmBackward>)

# 5.3 Custom Layers

### 5.3.1 Layers without Parameters

In [12]:
import torch 
from torch import nn
from torch.nn import functional as F

class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, X):
        return X-X.mean()
    
net = nn.Sequential(nn.Linear(8,128), CenteredLayer())

### 5.3.2 Layers with Parameters

In [15]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self,X):
        linear = torch.matmul(X, self.weight.data)+self.bias.data
        return F.relu(linear)
linear = MyLinear(5,3)
linear.weight

net = nn.Sequential(MyLinear(64,8), MyLinear(8,1))
net(torch.rand(2,64))

tensor([[6.6415],
        [4.7749]])

### Exercises

In [22]:
# 1. Design a layer that takes an input and computes a tensor reduction
class ex1_Layer(nn.Module):
    def __init__(self, in_units, units, ):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
    def forward(self,X):
        linear = torch.matmul(X, self.weight.data)
        return F.relu(linear)
net = nn.Sequential(ex1_Layer(20,4), ex1_Layer(4,1))
net(torch.rand(2,20))

tensor([[0.0000],
        [1.3620]])

In [24]:
# 2. Design a layer that returns the leading half of the Fourier coefficients of the data.

import numpy as np
def fourier_series_coeff_numpy(f, T, N, return_complex=False):
    """Calculates the first 2*N+1 Fourier series coeff. of a periodic function.

    Given a periodic, function f(t) with period T, this function returns the
    coefficients a0, {a1,a2,...},{b1,b2,...} such that:

    f(t) ~= a0/2+ sum_{k=1}^{N} ( a_k*cos(2*pi*k*t/T) + b_k*sin(2*pi*k*t/T) )

    If return_complex is set to True, it returns instead the coefficients
    {c0,c1,c2,...}
    such that:

    f(t) ~= sum_{k=-N}^{N} c_k * exp(i*2*pi*k*t/T)

    where we define c_{-n} = complex_conjugate(c_{n})

    Refer to wikipedia for the relation between the real-valued and complex
    valued coeffs at http://en.wikipedia.org/wiki/Fourier_series.

    Parameters
    ----------
    f : the periodic function, a callable like f(t)
    T : the period of the function f, so that f(0)==f(T)
    N_max : the function will return the first N_max + 1 Fourier coeff.

    Returns
    -------
    if return_complex == False, the function returns:

    a0 : float
    a,b : numpy float arrays describing respectively the cosine and sine coeff.

    if return_complex == True, the function returns:

    c : numpy 1-dimensional complex-valued array of size N+1

    """
    # From Shanon theoreom we must use a sampling freq. larger than the maximum
    # frequency you want to catch in the signal.
    f_sample = 2 * N
    # we also need to use an integer sampling frequency, or the
    # points will not be equispaced between 0 and 1. We then add +2 to f_sample
    t, dt = np.linspace(0, T, f_sample + 2, endpoint=False, retstep=True)

    y = np.fft.rfft(f(t)) / t.size

    if return_complex:
        return y
    else:
        y *= 2
        return y[0].real, y[1:-1].real, -y[1:-1].imag
    
class ex2_Layer(nn.Module):
    def __init__(self, in_units, units, ):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
    def forward(self,X):
        linear = fourier_series_coeff_numpy(f, T, N)
        return F.relu(linear)