In [None]:
# Sigmoid

import numpy as np

class Sigmoid:

    def __init__(self):
        self.out = None

    def forward(self,x):
        out = 1 / (1 + np.exp(-x))
        self.out = out

    def backward(self, dout):
        # dx = delivative of x -> 뒤에서 전파된 기울기
        # dout = delivative of out -> 앞으로 전달될 기울기
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [None]:
class Affine:
    def __init__(self,w,b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None

    def forward(self,x, w):
        self.x = x
        self.w = w
        out = np.dot(x,w) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.w.T)
        self.dw = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        return dx

# 순전파 때 수행하는 행렬곱을 기하학에서는 Affine transformation 이라고 한다.

(2,)
(2, 3)
[2.86597613 3.85388802 4.84179991]


In [None]:
import numpy as np


def softmax(x):
    return np.exp(x - np.max(x)) / np.sum(np.exp(x - np.max(x)))

def cross_entropy_error(y,t):
    delta = 1e-7
    return -np.sum(t*np.log(y + delta))

class softmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None


    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, dout = 1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx


##### 순전파
1. $a_1 .... a_n$
2. $softmax(x) -> softmax-output = y_1 ... y_n$
3. $softmax-output -> cross_entropy_error(y,t) : t (answer label)$
##### 역전파
1. $L(오차 함수) -> cross_entropy -> softmax  = (y_n - t_n)$

In [None]:
class Affine:
    def __init__(self,w,b):
        self.W = w
        self.b = b
        self.dx = None
        self.orignal_x_shape = None

    def forward(self, x):
        self.orignal_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dw = np.dot(self.x.T, dout)
        self.db = np.sum(dout,axis=0)

        dx = dx.reshape(*self.orignal_x_shape)

        return dx      

In [None]:
from collections import OrderedDict

class ReLu:

    def __init__(self):
        self.mask = None

    def forward(self,x):
        self.mask = (x <= 0)
        out = np.copy(x)
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dx = np.copy(dout)
        dx[self.mask] = 0
        return dx
    
def numerical_gradient(f,x):
    h = 1e+4
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.index
        tmp = x[idx]
        x[idx] = tmp + h 
        dx1 = f(x[idx])

        x[idx] = tmp - h
        dx2 = f(x[idx])
        grad[idx] = (dx1 - dx2) / (2*h)
        x[idx] = tmp
        it.iternext()
    return grad


class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size):
        self.params : set = {}
        self.params['W1'] = np.random.rand(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.rand(input_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        # 순서를 기억하는 딕셔너리
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['ReLu1'] = ReLu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = softmaxWithLoss(output_size)

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, y, t):
        y = self.predict(y)
        l = self.lastLayer.forward(y,t)
        return l
    
    def numerical_gredient(self,x,t):
        loss_W = lambda W : self.loss(x,t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

#### 궁금증(04-14)
1. 초기 가중치는 멀까? 그냥 랜덤함수로 생성하는건가?
2. 자꾸 Affine을 찾아보면 완전연결층(fully connected layer)라는 단어가 나오는데 이 종류에 따른 식도 달라지는지 궁금해진다.