# Affine 계층

In [1]:
import numpy as np

In [2]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.W
        
        return out
    
    def backward(self, dout):    #dout : dL/dY
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis=0)
        return dx

# Sigmoid Layer

In [3]:
class Sigmoid:
    def __init__(self):
        self.out = None
    def forward(self, x):
        self.out = np.exp(x) / (1 + np.exp(x))
        return self.out    # forward 결과를 self.out에 저장
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [None]:
class Relu:
    def __init__(self):
        self.mask = None
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [4]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    def backward(self, dout=1):
        dx = (self.y - self.t)
        return dx


In [None]:
class SGD:
    def __init__(self, lr=0.01)
        self.lr = lr
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

In [1]:
class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] 

In [None]:
network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10)
optimizer = SGD(lr=0.01)

In [None]:
class Dropout:
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None
    def forward(self, x, train_flg=True):
        if train_flg:
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask
        else:
            return x * (1.0-self.dropout_ratio)
    def backward(self, dout):
        return dout * self.mask