In [1]:
import numpy as np
from typing import TypeVar, Tuple
import numpy.typing as npt

### simple layer implementation : Multiply & Add

In [2]:
# user defined type hint
IntFloat = TypeVar('IntFloat', int, float)

In [3]:
class MulLayer:
    def __init__(self):
        self.x: IntFloat = None
        self.y: IntFloat = None

    def forward(self, x: IntFloat, y:IntFloat) -> IntFloat:
        self.x = x
        self.y = y
        
        out = x * y
        return out
    
    def backward(self, dout: IntFloat) -> Tuple[IntFloat]:
        dx = dout * self.y
        dy = dout * self.x

        return (dx, dy)

In [4]:
class AddLayer:
    def __init__(self):
        pass

    def forward(self, x: IntFloat, y: IntFloat) -> IntFloat:
        out = x + y
        return out
    
    def backward(self, dout: IntFloat) -> Tuple[IntFloat]:
        dx = dout * 1
        dy = dout * 1
        return (dx, dy)

In [5]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(price)
print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


### activation layer implementation : ReLU & Sigmoid

In [6]:
class Relu:
    def __init__(self):
        self.mask: npt.NDArray[np.bool_] = None

    def forward(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        dout[self.mask] = 0
        dx = dout

        return dx

In [7]:
class Sigmoid:
    def __init__(self):
        self.out: npt.NDArray[np.float64] = None

    def forward(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        dx = dout * (1.0 - self.out) * self.out
        return dx

### implementation of Affine/Softmax layer

In [8]:
def softmax(a: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
    # Using variable c to prevent overflow value.
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

In [9]:
def cross_entropy_error(y: npt.NDArray[np.float64], t: npt.NDArray[np.int32]) -> np.float64: 
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

In [10]:
class Affine:
    def __init__(self, W: npt.NDArray[np.float64], b: npt.NDArray[np.float64]):
        self.W: npt.NDArray[np.float64] = W
        self.b: npt.NDArray[np.float64] = b
        self.x: npt.NDArray[np.float64] = None
        self.dW: npt.NDArray[np.float64] = None
        self.db: npt.NDArray[np.float64] = None

    def forward(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out
    
    def backward(self, dout: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        return dx

In [11]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss: npt.NDArray[np.float64] = None
        self.y: npt.NDArray[np.float64] = None
        self.t: npt.NDArray[np.int32] = None

    def forward(self, x: npt.NDArray[np.float64], t: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, dout: np.float64 = 1) -> npt.NDArray[np.float64]:
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

### training the neural network using backpropagation

In [12]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

In [13]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.gradient(x_batch, t_batch)

    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.09843333333333333 0.1023
0.90365 0.9076
0.92095 0.9254
0.9352 0.9349
0.9448 0.9437
0.9517666666666666 0.9496
0.9554333333333334 0.9529
0.9604666666666667 0.9571
0.9653166666666667 0.9603
0.9669333333333333 0.9612
0.9695 0.9623
0.9715833333333334 0.9643
0.9733833333333334 0.9649
0.9741333333333333 0.965
0.9760666666666666 0.9679
0.9756833333333333 0.967
0.9781666666666666 0.9689
