[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DoranLyong/Awesome-Tensor-Architecture/blob/main/pytorch_reference/simple_reference/05_Customizing/01_Customizing_PyTorch.ipynb)

# Customizing PyTorch 

In [8]:
import math 

import numpy as np 

import torch 
import torch.nn as nn 
import torch.nn.functional as F 

In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

## Custom Layers and Activations (p.136)
* create custom ```layers``` and ```activations``` using functional definition (```nn.functional```)


In [11]:
# Define your Linear-layer 

def linear(input, weight, bias=None): 
    # Y = X * W.t() + b 

    if input.dim() == 2 and bias is not None: 
        # fused-matrix operation is marginally faster 
        # b + X*W.t()
        ret = torch.addmm(bias, input, weight.t()) # (ref) https://pytorch.org/docs/stable/generated/torch.addmm.html

    else: 
        # X * W.t()
        output = input.matmul(weight.t()) # (ref) https://pytorch.org/docs/stable/generated/torch.matmul.html

        if bias is not None: 
            output += bias
        
        ret = output 
    return ret

In [15]:
b = torch.randn(2, 3) 
x = torch.randn(2, 3)
W = mat2 = torch.randn(3, 3)

output1 = linear(x, W, b)
output2 = linear(x, W)

print(output1)
print(output2)

tensor([[-3.0764,  6.3686,  3.0655],
        [ 1.3306,  0.1079,  2.6226]])
tensor([[-3.7471,  5.7551,  2.1965],
        [ 0.9115, -0.0833,  3.0799]])


### 클래스로 디자인하기 

In [23]:
class Linear(nn.Module):
    
    def __init__(self, in_features, out_features, bias):
        """ y = x * W.t() + b
            - in_features: size of each input sample
            - out_features: size of each output sample
            - bias: If set to ``False``, the layer will not learn an additive bias. (Default:=True) 
        
            Attributes:
            - weight: the learnable weights of the module of shape
            - bias:   the learnable bias of the module of shape

            Examples: 
                >> m = nn.Linear(20, 30)
                >> input = torch.randn(128, 20)
                >> output = m(input)
                >> print(output.size())
                torch.Size([128, 30])
        """

        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features 
        self.weight = nn.parameter.Parameter( data=torch.Tensor(out_features, in_features), 
                                              requires_grad=True,
                                            )

        if bias: 
            self.bias = nn.parameter.Parameter(torch.Tensor(out_features))
        
        else: 
            self.register_parameter('bias', None)
        
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) # (ref) https://pytorch.org/docs/stable/nn.init.html#
        
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
        
    def forward(self, input: torch.Tensor) -> torch.Tensor: 
        return linear(input, self.weight, self.bias) # (ref) https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear

In [20]:
m = Linear(20, 30, True)
input = torch.randn(128, 20)
output = m(input)
print(output.size())

torch.Size([128, 30])


## Custom Layer Example (Complex Linear) (p.138)
* complex number ; ```a + jb```

In [16]:
def complex_linear(in_r, in_i, w_r, w_i, b_i, b_r): 
    out_r = (in_r.matmul(w_r.t()) - in_i.matmul(w_i.t()) + b_r) # real part  
    out_i = (in_r.matmul(w_i.t()) - in_i.matmul(w_r.t()) + b_i) # imaginary part 

    return out_r, out_i 

### 클래스로 디자인하기

In [21]:
class ComplexLinear(nn.Module):
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight_r = nn.parameter.Parameter(torch.randn(out_features, in_features))
        self.weight_i = nn.parameter.Parameter(torch.randn(out_features, in_features))
        self.bias_r = nn.parameter.Parameter(torch.randn(out_features))
        self.bias_i = nn.parameter.Parameter(torch.randn(out_features))

    def forward(self, in_r, in_i):
        return complex_linear(in_r, in_i, self.weight_r, self.weight_i, self.bias_r, self.bias_i)

In [22]:
class ComplexLinearSimple(nn.Module):
    def __init__(self, in_features, out_features):
        super(ComplexLinearSimple, self).__init__()
        self.fc_r = nn.Linear(in_features, out_features)
        self.fc_i = nn.Linear(in_features, out_features)

    def forward(self,in_r, in_i):
        return (self.fc_r(in_r) - self.fc_i(in_i), self.fc_r(in_i)+self.fc_i(in_r))

여기까지, custom-layer 디자인하는 방법 정리 
***


## Custom Activation Example (p.141)

In [24]:
def my_relu(input, thresh=0.0):
    return torch.where(input>thresh, input, torch.zeros_like(input)) # (ref) https://pytorch.org/docs/stable/generated/torch.where.html?highlight=torch%20where#torch.where

In [27]:
x = torch.randn(2,2)
print(x)
print(my_relu(x))

tensor([[ 0.7550,  0.9892],
        [-0.0580, -0.4163]])
tensor([[0.7550, 0.9892],
        [0.0000, 0.0000]])


### 클래스 형태로 디자인 

In [28]:
class MyReLU(nn.Module): 
    def __init__(self, thresh=0.0): 
        super(MyReLU, self).__init__()
        self.thresh = thresh

    def forward(self, input): 
        return my_relu(input, self.thresh)

In [31]:
relu = MyReLU(thresh=0.0)
x = torch.randn(2,2)

print(x)
print(relu(x))

tensor([[-0.8367,  0.4339],
        [-0.9981, -0.5278]])
tensor([[0.0000, 0.4339],
        [0.0000, 0.0000]])


### How to use activation? 
When building an ```NN```, it is common to use the functional version of the activation function. <br/>
But, a class version can also be used if available. 
* functional version 
* class version 

In [34]:
# functional version ReLU usage 

class SimpleNet(nn.Module):
    def __init__(self, D_in, H, D_out): 
        super(SimpleNet, self).__init__() 
        self.fc1 = nn.Linear(D_in, H) # input -> hidden 
        self.fc2 = nn.Linear(H, D_out) # hidden -> output 

    def forward(self, x):
        x = F.relu(self.fc1(x)) # functional version ReLU
        return self.fc2(x)

In [35]:
# class version ReLU usage 

class SimpleNet(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(SimpleNet, self).__init__()
        self.net = nn.Sequential( nn.Linear(D_in, H),
                                  nn.ReLU(),  # class version ReLU
                                  nn.Linear(H, D_out),
                                )
    
    def forward(self, x):
        return self.net(x)

## Custom Activation Example (Complex ReLU) (p.142)
* to handle complex values from the ```ComplexLinear``` layer 

In [36]:
# functional version 

def complex_relu(in_r, in_i): # input := (real, imaginary)
    return (F.relu(in_r), F.relu(in_i))

In [37]:
# class version 

class ComplexReLU(nn.Module): 
    def __init__(self):
        super(ComplexReLU, self).__init__()

    def forward(self, in_r, in_i):
        return complex_relu(in_r, in_i)

Now that you've learned how to create your own ```layers``` and ```activations```!

## Custom Model Architectures (p.143)
* build your ```AlexNet```

In [42]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(  nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
                                        nn.ReLU(inplace=True),
                                        nn.MaxPool2d(kernel_size=3, stride=2),

                                        nn.Conv2d(64, 192, kernel_size=5, padding=2),
                                        nn.ReLU(inplace=True),
                                        nn.MaxPool2d(kernel_size=3, stride=2),

                                        nn.Conv2d(192, 384, kernel_size=3, padding=1),
                                        nn.ReLU(inplace=True),
                                        nn.Conv2d(384, 256, kernel_size=3, padding=1),
                                        nn.ReLU(inplace=True),
                                        nn.Conv2d(256, 256, kernel_size=3, padding=1),
                                        nn.ReLU(inplace=True),
                                        nn.MaxPool2d(kernel_size=3, stride=2),
                                    )
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))

        self.classifier = nn.Sequential(    nn.Dropout(),
                                            nn.Linear(256 * 6 * 6, 4096), 
                                            nn.ReLU(inplace=True), 
                                            nn.Dropout(),
                                            nn.Linear(4096, 4096), 
                                            nn.ReLU(inplace=True),
                                            nn.Linear(4096, num_classes)
                                        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool()
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x 

The options here are pretrained and progress 
* ```torchvision.models.alexnet(pretrained=true)``` 이 동작하는 방식 설명 

In [43]:
from torch.hub import load_state_dict_from_url
model_urls = { 'alexnet': 
                'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
            }

def alexnet(pretrained=False, progress=True, **kwargs):
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(  model_urls['alexnet'],
                                                progress=progress,
                                            )
        model.load_state_dict(state_dict)
    return model

In [45]:
# to test code above
model = alexnet(pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /home/milky/.cache/torch/hub/checkpoints/alexnet-owt-4df8aa71.pth
100.0%


## Custom Loss Functions (p.145)

In [52]:
# dummy variable to get code below to run
outputs = torch.rand((10,10), requires_grad=True)
targets = torch.rand((10,10))

In [53]:
# built-in version 
loss_fcn = nn.MSELoss()
loss = loss_fcn(outputs, targets)
loss.backward() # perform backpropagation 

print(loss)

tensor(0.1392, grad_fn=<MseLossBackward0>)


In [59]:
# functional version 
def mse_loss(input, target): 
    return ((input - target)**2).mean()

In [61]:
loss = mse_loss(outputs, targets)
print(loss)

print(F.mse_loss(outputs, targets))

tensor(0.1392, grad_fn=<MeanBackward0>)
tensor(0.1392, grad_fn=<MseLossBackward0>)


In [62]:
# class version 
class MSELoss(nn.Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, input, target):
        return F.mse_loss(input, target)

In [65]:
criterion = MSELoss()
loss = criterion(outputs, targets)

print(loss)

tensor(0.1392, grad_fn=<MseLossBackward0>)


### MSE Loss for Complex Numbers 

In [66]:
def complex_mse_loss(input_r, input_i, target_r, target_i):
    return (((input_r-target_r)**2).mean(), # real part 
            ((input_i-target_i)**2).mean(), # imaginary part 
            )


class ComplexMSELoss(nn.Module):
    def __init__(self, real_only=False):
        super(ComplexMSELoss, self).__init__()
        self.real_only = real_only

    def forward(self, input_r, input_i, target_r, target_i):
        if (self.real_only):
            return F.mse_loss(input_r, target_r)
        else:
            return complex_mse_loss(input_r, input_i, 
                                    target_r, target_i)

In [68]:
# to test code above
criterion = ComplexMSELoss()
loss = criterion(outputs, outputs, targets, targets)

print(loss)

(tensor(0.1392, grad_fn=<MeanBackward0>), tensor(0.1392, grad_fn=<MeanBackward0>))


## Custom optimizer Algorithms (p.147)