In [1]:
import numpy as np
import matplotlib.pyplot as plt

# BackPropagation

## NN Layer

In [2]:
import abc

class LayerInterface(object, metaclass=abc.ABCMeta):
  @abc.abstractmethod
  def forward(self, x, y):
    raise NotImplementedError('users must define forward to use this base class')
  @abc.abstractmethod
  def backward(self, dout):
    raise NotImplementedError('users must define backward to use this base class')


### Mul layer

In [3]:
class MulLayer(LayerInterface):
  def __init__(self):
    self.x = None
    self.y = None
  
  def forward(self, x, y):
    self.x, self.y = x, y
    out = self.x * self.y
    return out
  
  def backward(self, dout):
    dx = dout * self.y
    dy = dout * self.x
    return dx, dy


In [4]:
apple_amount = 2
apple_price = 100
tax = 1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# Forward Propagation
tot_apple_price = mul_apple_layer.forward(apple_amount, apple_price)
final_price = mul_tax_layer.forward(tot_apple_price, tax)

print(final_price)

220.00000000000003


In [5]:
# Back Propagation
dfinal_price = 1
dtot_apple_price, dtax = mul_tax_layer.backward(dfinal_price)
dapple_amount, dapple_price = mul_apple_layer.backward(dtot_apple_price)

print(dapple_amount, dapple_price, dtax)

110.00000000000001 2.2 200


### Add layer

In [6]:
class AddLayer(LayerInterface):
  def __init__(self):
    pass
  
  def forward(self, x, y):
    out = x + y
    return out

  def backward(self, dout):
    dx = dout * 1
    dy = dout * 1
    return dx, dy


In [7]:
# Variables setting
apple_amount, apple_price = 2, 100
mandarin_amount, mandarin_price = 3, 150
tax = 1.1

apple_layer = MulLayer()
mandarin_layer = MulLayer()
fruit_layer = AddLayer()
tot_layer = MulLayer()

# Forward Propagation
tot_apple = apple_layer.forward(apple_amount, apple_price)
tot_mandarin = mandarin_layer.forward(mandarin_amount, mandarin_price)
tot_fruit = fruit_layer.forward(tot_apple, tot_mandarin)
final_price = tot_layer.forward(tot_fruit, tax)

print(tot_apple, tot_mandarin, tot_fruit, final_price)

# Backward Propagation
d_final_price = 1
d_tot_fruit, d_tax = tot_layer.backward(d_final_price)
d_tot_apple, d_tot_mandarin = fruit_layer.backward(d_tot_fruit)
d_apple_amount, d_apple_price = apple_layer.backward(d_tot_apple)
d_mandarin_amount, d_mandarin_price = mandarin_layer.backward(d_tot_mandarin)

print(d_apple_amount, d_apple_price, d_mandarin_amount, d_mandarin_price, d_tax)

200 450 650 715.0000000000001
110.00000000000001 2.2 165.0 3.3000000000000003 650


## Activation Function

### ReLU Layer

In [8]:
class ReLU(LayerInterface):
  def __init__(self):
    self.x = None
  
  def forward(self, x):
    self.mask = x < 0
    out = x.copy()
    out[self.mask] = 0
    return out
  
  def backward(self, dout):
    dx = dout.copy()
    dx[self.mask] = 0
    return dx

### Sigmoid Layer

In [9]:
class Sigmoid(LayerInterface):
  def __init__(self):
    self.out = None
  
  def forward(self, x):
    out = 1 / (1 + np.exp(-x))
    self.out = out
    return self.out
  
  def backward(self, dout):
    dx = dout * self.out * (1 - self.out)
    return dx

### Affine Layer

In [14]:
class Affine(LayerInterface):
  def __init__(self):
    self.X = None
    self.W = None
    self.b = None
    self.out = None
    self.dX = None
    self.dW = None
    self.db = None
  
  def forward(self, X, W, b):
    self.X = X
    self.W = W
    self.b = b
    self.out = np.dot(X, W) + b
    return self.out

  def backward(self, dout):
    self.dX = np.dot(dout, self.W)
    self.dW = np.dot(self.X, dout)
    self.db = dout
    return self.dX

### SoftmaxWithLoss

In [52]:
class SoftmaxWithLoss(LayerInterface):
  def __init__(self):
    self.loss = None
    self.y = None
    self.t = None
  
  def forward(self, a, t):
    self.t = t
    self.y = SoftmaxWithLoss.softmax(a)
    self.loss = SoftmaxWithLoss.cross_entropy_error(self.y, self.t)
    return self.loss

  def backward(self, dout=1):
    batch_size = self.t.shape[0]
    dx = (self.y - self.t) / batch_size
    return dx

  def softmax(a):
    exp_a = np.exp(a)
    if a.ndim == 1:
      sum_exp_a = np.sum(exp_a)
    else:
      sum_exp_a = np.sum(exp_a, axis=1)
    return exp_a / sum_exp_a.reshape(-1, 1)

  def cross_entropy_error(y, t): # t: one-hot-encoded
    delta = 0.00001
    if y.ndim == 1:
      return -np.sum(t * np.log(y + delta))
    return -np.sum(t * np.log(y + delta), axis=1) / y.shape[0]

In [53]:
t = np.array([
              [1,0,0],
              [0,1,0]
              ])
a = np.array([
              [1,2,3],
              [1,2,5],
              ])
dd = SoftmaxWithLoss()

In [54]:
dd.forward(a, t)

array([1.20374745, 1.5328347 ])

In [55]:
dd.backward()

array([[-0.45498471,  0.12236424,  0.33262048],
       [ 0.00857391, -0.47669369,  0.46811978]])