# 연산그래프 구조 변환, 배열 연산 테스트 [[참조](https://velog.io/@pre_f_86/series/PyTorch-AutoGrad%EB%9E%80-%EB%AC%B4%EC%97%87%EC%9D%B8%EA%B0%80)]

- Pytorch의 연산 로직에 대한 이해와 비슷하게 작동하도록 구현

- Param 객체 내의 Data가 숫자, 소수점 등의 Scalar 데이터 타입이 아닌 배열(Numpy) 객체를 활용하여 Matrix 데이터를 담을 수 있도록 구현

    - 숫자, 소수점을 담는 경우에는 너무 많은 객체가 생성되어 연산량, 메모리 사용량 등의 증가를 해결 하고자 함



In [33]:
import numpy as np
import src as my

In [2]:

val1 = my.Param(np.zeros((10)), requires_grad=True)
val2 = my.Param(np.ones((10)), requires_grad=True)

out = abs(-((val1*val2)*5**2/10-1))
summed = 0
for val in out:
    summed += val
summed.backward()
val1.grad, val2.grad

(array([-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5]),
 array([-0., -0., -0., -0., -0., -0., -0., -0., -0., -0.]))

In [3]:

val1 = torch.zeros((10), requires_grad=True)
val2 = torch.ones((10), requires_grad=True)

out = abs(-((val1*val2)*5**2/10-1))
summed = 0
for val in out:
    summed += val
summed.backward()
val1.grad, val2.grad

(tensor([-2.5000, -2.5000, -2.5000, -2.5000, -2.5000, -2.5000, -2.5000, -2.5000,
         -2.5000, -2.5000]),
 tensor([-0., -0., -0., -0., -0., -0., -0., -0., -0., -0.]))

# 인덱싱(Get, Set) 그래디언트 테스트

- val1 : 연산 테스트, Leaf 노드까지 그래디언트가 잘 전달 되는지 확인

- val2 : val1과 동일

- val3 : Set 연산, Get 연산 테스트

- val4 : 중첩되는 Set 연산에 대한 테스트

In [None]:
import numpy as np
import torch

In [4]:

def test(val1, val2, val3, val4):
    out1 = val1+5

    out2 = out1*val2

    out2[2:7] = val3[:5]

    out2[1:3] = val4

    out3 = 0
    for i in range(out2.data.shape[0]):
        out3 = (out3 + out2[i])*5
    out3.backward()
    print(val1.grad)
    print(val2.grad)
    print(val3.grad)
    print(val4.grad)

In [5]:
val1 = my.Param(np.zeros(10), requires_grad=True)
val2 = my.Param(np.ones(10), requires_grad=True)
val3 = my.Param(np.ones(10), requires_grad=True)
val4 = my.Param(np.ones(2), requires_grad=True)

test(val1, val2, val3, val4)

[9.765625e+06 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
 0.000000e+00 0.000000e+00 1.250000e+02 2.500000e+01 5.000000e+00]
[4.8828125e+07 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 0.0000000e+00 6.2500000e+02 1.2500000e+02 2.5000000e+01]
[    0. 78125. 15625.  3125.   625.     0.     0.     0.     0.     0.]
[1953125.  390625.]


In [6]:

val1 = torch.zeros(10, requires_grad=True)
val2 = torch.ones(10, requires_grad=True)
val3 = torch.ones(10, requires_grad=True)
val4 = torch.ones(2, requires_grad=True)

test(val1, val2, val3, val4)

tensor([9.7656e+06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.2500e+02, 2.5000e+01, 5.0000e+00])
tensor([4.8828e+07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 6.2500e+02, 1.2500e+02, 2.5000e+01])
tensor([    0., 78125., 15625.,  3125.,   625.,     0.,     0.,     0.,     0.,
            0.])
tensor([1953125.,  390625.])


# 레이어 연산 테스트

※ 브로드캐스팅으로 인해 역전파된 그래디언트의 차원이 맞지 않는 현상 발견 (Layer의 Bias 부분)

- Linear 연산 이후 계산된 그래디언트 비교

In [1]:
import numpy as np
import src as my
import torch
import torch.nn as nn


In [2]:

class mymodel(my.layers.Module):
    def __init__(self):
        self.l1 = my.layers.Linear(2,4,bias=True)
        self.l2 = my.layers.Linear(4,4,bias=True)
        self.l3 = my.layers.Linear(4,4,bias=True)
        self.l4 = my.layers.Linear(4,1,bias=True)

    def forward(self,x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        out = self.l4(out)
        ret = 0

        for o in out:
            ret = ret + o
        return ret
model = mymodel()
x = my.Param(np.ones((5,2)))

out = model(x)
out.backward()


In [3]:
import numpy as np
import src as my
model = mymodel()
x = my.Param(np.ones((5,2)))

out = model(x)
out.backward()

print()
print(out,out.shape)
print(model.l1.weight.grad)
print(model.l1.bias.grad)
print(model.l2.weight.grad)
print(model.l2.bias.grad)
print(model.l3.weight.grad)
print(model.l3.bias.grad)
print(model.l4.weight.grad)
print(model.l4.bias.grad)



Node (Data:[1065.], requrired_grad:True) (1,)
[[80. 80. 80. 80.]
 [80. 80. 80. 80.]]
[[16. 16. 16. 16.]
 [16. 16. 16. 16.]
 [16. 16. 16. 16.]
 [16. 16. 16. 16.]
 [16. 16. 16. 16.]]
[[60. 60. 60. 60.]
 [60. 60. 60. 60.]
 [60. 60. 60. 60.]
 [60. 60. 60. 60.]]
[[4. 4. 4. 4.]
 [4. 4. 4. 4.]
 [4. 4. 4. 4.]
 [4. 4. 4. 4.]
 [4. 4. 4. 4.]]
[[65. 65. 65. 65.]
 [65. 65. 65. 65.]
 [65. 65. 65. 65.]
 [65. 65. 65. 65.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[265.]
 [265.]
 [265.]
 [265.]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [4]:

class mymodel(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(2,4, bias=True)
        self.l2 = nn.Linear(4,4, bias=True)
        self.l3 = nn.Linear(4,4, bias=True)
        self.l4 = nn.Linear(4,1, bias=True)
    
        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.children():
            if isinstance(layer, nn.Linear):
                nn.init.constant_(layer.weight, 1)  
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 1) 

    def forward(self,x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        out = self.l4(out)
        ret = 0
        for o in out:
            ret = ret + o
        return ret
    

model = mymodel()
x = torch.ones((5,2))
out = model(x)
out.backward()
out

tensor([1065.], grad_fn=<AddBackward0>)

In [5]:
print(model.l1.weight.grad)
print(model.l1.bias.grad)
print(model.l2.weight.grad)
print(model.l2.bias.grad)
print(model.l3.weight.grad)
print(model.l3.bias.grad)
print(model.l4.weight.grad)
print(model.l4.bias.grad)

tensor([[80., 80.],
        [80., 80.],
        [80., 80.],
        [80., 80.]])
tensor([80., 80., 80., 80.])
tensor([[60., 60., 60., 60.],
        [60., 60., 60., 60.],
        [60., 60., 60., 60.],
        [60., 60., 60., 60.]])
tensor([20., 20., 20., 20.])
tensor([[65., 65., 65., 65.],
        [65., 65., 65., 65.],
        [65., 65., 65., 65.],
        [65., 65., 65., 65.]])
tensor([5., 5., 5., 5.])
tensor([[265., 265., 265., 265.]])
tensor([5.])


# 서로 다른 차원을 가진 객체에 대한 역전파 테스트 [[참조](https://numpy.org/doc/stable/user/basics.broadcasting.html)]

- (5, 1) 과 (1)의 차원을 가진 두 객체 사이의 연산

- (5, 1) 과 (1, 5)의 차원을 가진 두 객체 사이의 연산

**Numpy 브로드캐스팅 규칙**

1. 두 차원의 길이가 맞지 않는 경우 길이가 맞을 때까지 작은 차원의 앞에 1을 붙임

    - (5, 2, 5) + (5,) -> (5, 2, 5) + (1, 1, 5)

2. 차원이 1인 경우 각 위치에 맞는 차원만큼 배열 복사

    - (5, 2, 5) + (1, 1, 5) -> (5, 2, 5) + (5, 2, 5)

In [13]:
import src as my
import numpy as np

In [14]:
tmp1 = my.Param(np.zeros((1, 5)), requires_grad=True)
tmp2 = my.Param(np.ones((5, 1)), requires_grad=True)
out = tmp1*tmp2
out.backward()

In [15]:
tmp1.grad, tmp1.grad.shape, tmp2.grad, tmp2.grad.shape

(array([[5., 5., 5., 5., 5.]]),
 (1, 5),
 array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 (5, 1))

# 학습 테스트

- 가중치를 1이 아닌 He Init을 사용하여 초기화

- Adam 구현

- ReLU 구현

In [16]:
import src as my
import numpy as np

In [42]:
def relu(x):
    x[x<0] = 0
    return x

class mymodel(my.layers.Module):
    def __init__(self):
        self.l1 = my.layers.Linear(2,16,bias=True)
        self.l2 = my.layers.Linear(16,16,bias=True)
        self.l3 = my.layers.Linear(16,16,bias=True)
        self.l4 = my.layers.Linear(16,1,bias=True)

    def forward(self,x):
        out = self.l1(x)
        out = relu(out)
        out = self.l2(out)
        out = relu(out)
        out = self.l3(out)
        out = relu(out)
        out = self.l4(out)
        return out

x = my.Param(np.stack([np.arange(100), np.arange(100)], 1))/100
y = my.Param(np.arange(100).reshape(-1, 1))/100

In [43]:

model = mymodel()
optim = my.optimizers.Adam(params=model.parameters(),lr=1e-3)
for i in range(1000):
    out = model(x)
    
    loss = (out-y)**2

    loss_mean = 0
    for l in loss:
        loss_mean = loss_mean + l/100

    
    loss_mean.backward()
    optim.update()
    optim.zero_grad()
    if (i%10==0):
        print(loss_mean)  
model([1000,1000])

Data:[0.18300367], requrired_grad:True
Data:[0.04558312], requrired_grad:True
Data:[0.00253195], requrired_grad:True
Data:[2.43981414e-06], requrired_grad:True
Data:[0.00052742], requrired_grad:True
Data:[0.00049233], requrired_grad:True
Data:[0.00027985], requrired_grad:True
Data:[0.0001116], requrired_grad:True
Data:[3.78963245e-05], requrired_grad:True
Data:[1.0044037e-05], requrired_grad:True
Data:[1.74961075e-06], requrired_grad:True
Data:[7.58300889e-08], requrired_grad:True
Data:[3.37215609e-08], requrired_grad:True
Data:[8.47882185e-08], requrired_grad:True
Data:[6.00794388e-08], requrired_grad:True
Data:[2.55156315e-08], requrired_grad:True
Data:[7.84502031e-09], requrired_grad:True
Data:[1.72523721e-09], requrired_grad:True
Data:[2.06079571e-10], requrired_grad:True
Data:[1.89906463e-11], requrired_grad:True
Data:[4.19445674e-11], requrired_grad:True
Data:[3.81100717e-11], requrired_grad:True
Data:[1.73545469e-11], requrired_grad:True
Data:[4.54230245e-12], requrired_grad:Tru

Data:[1000.], requrired_grad:True

# 주요 배열 연산 테스트

※ 이 과정에서 Param 객체 생성시 Numpy 스칼라인 경우에는 허용되지 않는 경우(버그) 수정

- Sum(), Mean() 연산 : 그래디언트 테스트, 연산 결과 테스트

- Reshape, Stack, Concat 연산 : 그래디언트 테스트, 연산 결과 테스트

- aranage, ones, zeros, full, ~_like 연산 : 결과 테스트

In [3]:
import src as my
import numpy as np
import torch
import torch.nn as nn

In [4]:
arr = my.arange(54, requires_grad=True)
arr2 = arr.reshape((3, 2, 9))
out = arr2.sum((2, 0))
(out.mean()*9).backward()
print(out, out.shape, arr.grad.shape)
print(arr.grad)

(2,) [1, 2, 1]
Data:[594 837], requrired_grad:True (2,) (54,)
[4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5
 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5
 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5 4.5]


In [5]:
arr = torch.arange(54, dtype=torch.float32, requires_grad=True)
arr2 = arr.reshape((3, 2, 9))
out = arr2.sum((2, 0))
(out.mean()*9).backward()
print(out, out.shape, arr.grad.shape)
print(arr.grad)

tensor([594., 837.], grad_fn=<SumBackward1>) torch.Size([2]) torch.Size([54])
tensor([4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000,
        4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000,
        4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000,
        4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000,
        4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000,
        4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000, 4.5000])


In [6]:
arr1 = my.ones((1,1,3), requires_grad=True)
arr2 = my.zeros((1,1,3), requires_grad=True)
arr3 = my.full((1,1,3), 3, requires_grad=True)
stack = my.stack([arr1, arr2, arr3], 0)
concat = my.concat([arr1, arr2, arr3], 0)
stack.sum().backward()
concat.sum().backward()

arr1.grad, arr2.grad, arr3.grad

(array([[[2., 2., 2.]]]), array([[[2., 2., 2.]]]), array([[[2., 2., 2.]]]))

In [7]:
arr1 = torch.ones((1,1,3), requires_grad=True)
arr2 = torch.zeros((1,1,3), requires_grad=True)
arr3 = torch.full((1,1,3), 3, dtype=torch.float32, requires_grad=True)
stack = torch.stack([arr1, arr2, arr3], 0)
concat = torch.concat([arr1, arr2, arr3], 0)
stack.sum().backward()
concat.sum().backward()

arr1.grad, arr2.grad, arr3.grad

(tensor([[[2., 2., 2.]]]), tensor([[[2., 2., 2.]]]), tensor([[[2., 2., 2.]]]))

# Convolution 연산 테스트

- Torch와의 Gradient, Output 비교

- Padding(0, 1, 2) 에서의 비교

- Kernel_size (2, 3) 에서 비교

- Stride (1, 2) 에서 비교

- Non-Batch, Batch 에서 비교

- Backward 단계에서 연산 시간이 오래 걸려 Convolution grad function 구현 및 테스트

In [1]:
import numpy as np
import src as my
import torch
import torch.nn as nn


In [2]:

class mymodel(my.layers.Module):
    def __init__(self):
        self.l1 = my.layers.Conv1d(2, 4, 3, 2, padding=2, bias=True)
        self.l2 = my.layers.Conv1d(4, 4, 3, 2, padding=2, bias=True)
        self.l3 = my.layers.Conv1d(4, 4, 3, 2, padding=2, bias=True)
        self.l4 = my.layers.Conv1d(4, 1, 3, 2, padding=2, bias=True)

    def forward(self,x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        out = self.l4(out)
        print(out.shape)
        return out.sum()
model = mymodel()
x = my.ones((2, 18))

out = model(x)
out.backward()
out

(1, 3)


Data:13419.0, requrired_grad:True

In [3]:

print(model.l1.weight.grad)
print(model.l1.bias.grad)
print(model.l2.weight.grad)
print(model.l2.bias.grad)
print(model.l3.weight.grad)
print(model.l3.bias.grad)
print(model.l4.weight.grad)
print(model.l4.bias.grad)


[[[448. 448. 496.]
  [448. 448. 496.]]

 [[448. 448. 496.]
  [448. 448. 496.]]

 [[448. 448. 496.]
  [448. 448. 496.]]

 [[448. 448. 496.]
  [448. 448. 496.]]]
[[512.]
 [512.]
 [512.]
 [512.]]
[[[264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]]

 [[264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]]

 [[264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]]

 [[264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]
  [264. 272. 288.]]]
[[52.]
 [52.]
 [52.]
 [52.]]
[[[268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]]

 [[268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]]

 [[268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]]

 [[268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]
  [268. 288. 281.]]]
[[6.]
 [6.]
 [6.]
 [6.]]
[[[1074. 1206. 1074.]
  [1074. 1206. 1074.]
  [1074. 1206. 1074.]
  [1074. 1206. 1074.]]]
[[3.]]


In [5]:

class mymodel(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Conv1d(2, 4, 3, 2, padding=2, bias=True)
        self.l2 = nn.Conv1d(4, 4, 3, 2, padding=2, bias=True)
        self.l3 = nn.Conv1d(4, 4, 3, 2, padding=2, bias=True)
        self.l4 = nn.Conv1d(4, 1, 3, 2, padding=2, bias=True)
    
        self._initialize_weights()

    def _initialize_weights(self):
        for layer in self.children():
            if isinstance(layer, nn.Conv1d):
                nn.init.constant_(layer.weight, 1)  
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 1) 

    def forward(self,x):
        out = self.l1(x)
        out = self.l2(out)
        out = self.l3(out)
        out = self.l4(out)
        print(out.shape)
        return out.sum()
    

model = mymodel()
x = torch.ones((2, 18))
out = model(x)
out.backward()
out

torch.Size([1, 3])


tensor(13419., grad_fn=<SumBackward0>)

In [6]:
print(model.l1.weight.grad)
print(model.l1.bias.grad)
print(model.l2.weight.grad)
print(model.l2.bias.grad)
print(model.l3.weight.grad)
print(model.l3.bias.grad)
print(model.l4.weight.grad)
print(model.l4.bias.grad)

tensor([[[448., 448., 496.],
         [448., 448., 496.]],

        [[448., 448., 496.],
         [448., 448., 496.]],

        [[448., 448., 496.],
         [448., 448., 496.]],

        [[448., 448., 496.],
         [448., 448., 496.]]])
tensor([512., 512., 512., 512.])
tensor([[[264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.]],

        [[264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.]],

        [[264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.]],

        [[264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.],
         [264., 272., 288.]]])
tensor([52., 52., 52., 52.])
tensor([[[268., 288., 281.],
         [268., 288., 281.],
         [268., 288., 281.],
         [268., 288., 281.]],

        [[268., 288., 281.],
         [268., 288., 281.],
         [268., 288., 281.],
         [268., 288.,