# FCLayer의 Backpropagation을 이용한 weight update실습

다음과 같은 내용을 손글씨 내용정리를 통하여 알고 있습니다!

## Feedforward

$Input : a^\ell$ \
$z^\ell = w^\ell a^{\ell-1} + b^\ell$ and $a^\ell = \sigma(z^\ell)$ &nbsp; where &nbsp; $\ell = 2,3, ... L$ 


## Backward
output error : $\delta^l = {\partial L\over\partial z^\ell} = {\partial L\over\partial a^\ell} {\partial a^\ell\over\partial z^\ell}$ &nbsp; where &nbsp; $L$ : Loss \
$\delta^\ell = (w^\ell)^Td^{\ell+1} \circledcirc \sigma(z^\ell)$ &nbsp; where &nbsp; $\ell = (\ell-1), (\ell-2), ... 2$
* 이때 실습에서는 MSELoss를 사용했습니다. \
MSELoss = $\frac{1}{N} \sum_{i=1}^{N}(a^L_i-t_i)^2$ &nbsp; where &nbsp; $t = target$ 


## weight update
$w^\ell$ -> $w^\ell - lr {\partial L\over\partial w^\ell}$ &nbsp; where &nbsp; ${\partial L\over\partial w^\ell} = \delta^\ell (a^{\ell-1})^T$ \
$b^\ell$ -> $b^\ell - lr {\partial L\over\partial b^\ell}$ &nbsp; where &nbsp; ${\partial L\over\partial b^\ell} = \delta^\ell$

# (문제) numpy를 이용한 NumpyNet을 구현 하세요
- 위 내용을 참고하여 아래의 빈칸을 채워 넣으세요
- 아래 파이토치 모델과 동일한 weight update모델을 만드는 것이 목표입니다.
- 아래의 예제문제를 통하여 weight의 변화를 확인하세요!

In [1]:
import numpy as np

class NumpyNet:
    def __init__(self, Weight, Bias, Activation=lambda x: x):
        assert len(Weight) == len(Bias), "prdict와 target의 길이가 같아야합니다."
        self.weight = [np.array(w, dtype=np.float64) for w in Weight]
        self.bias = [np.array(b, dtype=np.float64) for b in Bias]
        self.f = Activation

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        if not isinstance(type(x), np.ndarray):
            x = np.array(x, dtype=np.float64)

        self.forward_result = [x]
        for w, b in zip(self.weight, self.bias):
            z = w @ x + b
            x = self.f(z)
            self.forward_result.append(z)
        return x

    def backward(self, predict, target, h=1e-8):
        assert len(predict) == len(target), "prdict와 target의 길이가 같아야합니다."
        
        loss = self.mse_loss(predict, target)
        for idx in range(len(self.weight) - 1, 0, -1):
            w = self.weight[idx]
            d = self.backward_result[-1]
            z = self.forward_result[idx]
            new_d = (w.T @ d) * self.diff(self.f, z, h)
            self.backward_result.insert(0, new_d)
        return loss

    def weight_update(self, lr=0.01):
        fr = [np.expand_dims(f, 0) for f in self.forward_result[:-1]]
        br = [np.expand_dims(b, 1) for b in self.backward_result]

        self.weight = [w - lr * (b @ self.f(z)) for w, z, b in zip(self.weight, fr, br)]
        self.bias = [bia - lr * b.squeeze() for bia, b in zip(self.bias, br)]

    def mse_loss(self, predict, target, h=1e-8):
        if not isinstance(np.ndarray, type(target)):
            target = np.array(target, dtype=np.float64)

        eps = np.identity(len(predict)) * h
        loss_rh, loss_lh = self.mse((predict + eps), target), self.mse((predict - eps), target)
        d = np.diag((loss_rh - loss_lh) / (2 * h)) * self.diff(self.f, self.forward_result[-1], h)
        self.backward_result = [d]

        loss = self.mse(predict, target)
        return loss.sum()

    def mse(self, predict, target):
        return np.divide(np.power(predict - target, 2), len(predict))

    def diff(self, f, x, h):
        return (f(x + h) - f(x - h)) / (2 * h)

## pytorch를 이용한 TorchNet

In [2]:
import torch
from torch import nn

class TorchNet(nn.Module):
    def __init__(self, Weight, Bias, Activation=False):
        super().__init__()
        self.net = nn.ModuleList()
        for w, b in zip(Weight, Bias):
            w = torch.tensor(w, dtype=torch.float64)
            b = torch.tensor(b, dtype=torch.float64)
            fc = nn.Linear(*torch.t(w).size())
            fc.weight = nn.Parameter(w)
            fc.bias = nn.Parameter(b)
            self.net.append(fc)
            if Activation:
                self.net.append(Activation())

    def forward(self, x):
        if not isinstance(torch.tensor, type(x)):
            x = torch.tensor(x, dtype=torch.float64)
        for layer in self.net:
            x = layer(x)
        return x

# 예제1
- input, target, weight, bias를 표현하면 다음과 같습니다.

$a^1 = \begin{pmatrix} 1 \\ 1 \end{pmatrix}$ &nbsp; and &nbsp;  $t = \begin{pmatrix} 2 \\ 3 \end{pmatrix}$ \
$w^2 = \begin{pmatrix} 1 & -2 \\ 2 & 4 \\ -3 & 1 \end{pmatrix}$ &nbsp; and &nbsp; $b^2 = \begin{pmatrix} 1 \\ 2 \\ 3 \end{pmatrix}$ \
$w^3 = \begin{pmatrix} 1 & 2 & -3 \\ 2 & -1 & 3 \end{pmatrix}$ &nbsp; and &nbsp; $b^3 = \begin{pmatrix} 2 \\ 1 \end{pmatrix}$ \
$activation function : \sigma(x) = x$


In [3]:
input_data, target = [1, 1], [2, 3]

weight1 = [[1, -2], [2, 4], [-3, 1]]
weight2 = [[1, 2, -3], [2, -1, 3]]
weight = [weight1, weight2]

bias1 = [1, 2, 3]
bias2 = [2, 1]
bias = [bias1, bias2]

## 넘파이 학습 결과

In [4]:
numpy_net = NumpyNet(Weight=weight, Bias=bias)
for epoch in range(1):
    outputs = numpy_net(input_data)
    numpy_net.backward(outputs, target)
    numpy_net.weight_update()

print([f"{i:.4f}" for i in sum(map(list,numpy_net.weight[0]),[])])
print([f"{i:.4f}" for i in sum(map(list,numpy_net.weight[1]),[])])
print([f"{i:.4f}" for i in numpy_net.bias[0]])
print([f"{i:.4f}" for i in numpy_net.bias[1]])

['1.0100', '-1.9900', '1.6700', '3.6700', '-2.4000', '1.6000']
['1.0000', '0.9600', '-3.1300', '2.0000', '-0.4400', '3.0700']
['1.0100', '1.6700', '3.6000']
['1.8700', '1.0700']


## 파이토치 학습결과

In [5]:
from torch import optim
torch_net = TorchNet(Weight=weight, Bias=bias)
criterion = nn.MSELoss()
optimizer = optim.SGD(torch_net.parameters(), lr=0.01)

for epoch in range(1):
    optimizer.zero_grad()
    outputs = torch_net(input_data)
    loss = criterion(outputs, torch.tensor(target, dtype=torch.float64))
    loss.backward()
    optimizer.step()

print([f"{i:.4f}" for i in sum(map(list,torch_net.net[0].weight),[])])
print([f"{i:.4f}" for i in sum(map(list,torch_net.net[1].weight),[])])
print([f"{i:.4f}" for i in torch_net.net[0].bias])
print([f"{i:.4f}" for i in torch_net.net[1].bias])

['1.0100', '-1.9900', '1.6700', '3.6700', '-2.4000', '1.6000']
['1.0000', '0.9600', '-3.1300', '2.0000', '-0.4400', '3.0700']
['1.0100', '1.6700', '3.6000']
['1.8700', '1.0700']


# 예제 2
- input, target, weight, bias를 표현하면 다음과 같습니다.
- 예제 1과 다르게 activation이 추가되었습니다.

$a^1 = \begin{pmatrix} 1 \\ 1 \end{pmatrix}$ &nbsp; and &nbsp;  $t = \begin{pmatrix} 2 \\ 3 \end{pmatrix}$ \
$w^2 = \begin{pmatrix} 1 & -2 \\ 1 & 1 \end{pmatrix}$ &nbsp; and &nbsp; $b^2 = \begin{pmatrix} 0 \\ 0 \end{pmatrix}$ \
$w^3 = \begin{pmatrix} 1 & -1 \\ 2 & -1 \end{pmatrix}$ &nbsp; and &nbsp; $b^3 = \begin{pmatrix} 0 \\ 0 \end{pmatrix}$ \
$activation function : \sigma(x) = x^2$



In [6]:
input_data, target = [1, 1], [2, 3]

weight1 = [[1, -2], [1, 1]]
weight2 = [[1, -1], [2, -1]]
weight = [weight1, weight2]

bias1 = [0, 0]
bias2 = [0, 0]
bias = [bias1, bias2]

## 넘파이 학습 결과

In [7]:
numpy_net = NumpyNet(Weight=weight, Bias=bias, Activation=lambda x: x**2)
for epoch in range(1):
    outputs = numpy_net(input_data)
    numpy_net.backward(outputs, target)
    numpy_net.weight_update()

print([f"{i:.4f}" for i in sum(map(list,numpy_net.weight[0]),[])])
print([f"{i:.4f}" for i in sum(map(list,numpy_net.weight[1]),[])])
print([f"{i:.4f}" for i in numpy_net.bias[0]])
print([f"{i:.4f}" for i in numpy_net.bias[1]])

['-0.0000', '-3.0000', '-0.8400', '-0.8400']
['1.4200', '0.6800', '2.0400', '-0.8400']
['-1.0000', '-1.8400']
['0.4200', '0.0400']


## 파이토치 학습결과

In [8]:
class SqureActivation(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x**2


active = SqureActivation
torch_net = TorchNet(weight, bias, active)
criterion = nn.MSELoss()
optimizer = optim.SGD(torch_net.parameters(), lr=0.01)


for epoch in range(1):
    optimizer.zero_grad()
    outputs = torch_net(input_data)
    loss = criterion(outputs, torch.tensor(target, dtype=torch.float64))
    loss.backward()
    optimizer.step()

print([f"{i:.4f}" for i in sum(map(list,torch_net.net[0].weight),[])])
print([f"{i:.4f}" for i in sum(map(list,torch_net.net[2].weight),[])])
print([f"{i:.4f}" for i in torch_net.net[0].bias])
print([f"{i:.4f}" for i in torch_net.net[2].bias])

['0.0000', '-3.0000', '-0.8400', '-0.8400']
['1.4200', '0.6800', '2.0400', '-0.8400']
['-1.0000', '-1.8400']
['0.4200', '0.0400']
