In [19]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

In [20]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [21]:
def sigmoid_derivat(x):
  return sigmoid(x)*(1-sigmoid(x))

In [22]:
def softmax(x):
  s = np.exp(x).sum(axis=1)
  return np.divide(np.exp(x).T, s).T

In [23]:
def softmax_derivat(x):
  return softmax(x) - np.power(softmax(x), 2)

In [146]:
x = np.random.randint(5, size=(1, 4))

In [25]:
softmax_derivat(x)

array([[0.1875    , 0.1875    , 0.1875    , 0.1875    ],
       [0.03869096, 0.15403884, 0.03869096, 0.09758102],
       [0.23191763, 0.11638834, 0.23191763, 0.11638834]])

In [147]:
x

array([[4, 3, 3, 1]])

In [27]:
s = x.sum(axis=1)

In [28]:
s

array([12,  8, 14])

In [29]:
np.divide(x.T,s).T

array([[0.25      , 0.25      , 0.25      , 0.25      ],
       [0.125     , 0.5       , 0.125     , 0.25      ],
       [0.28571429, 0.21428571, 0.28571429, 0.21428571]])

In [148]:
softmax(x)

array([[0.56005279, 0.20603191, 0.20603191, 0.02788339]])

In [31]:
x1 = np.arange(9.0).reshape((3, 3))
x2 = 2 * np.ones(3)
x1 / x2

array([[0. , 0.5, 1. ],
       [1.5, 2. , 2.5],
       [3. , 3.5, 4. ]])

In [32]:
x2

array([2., 2., 2.])

In [33]:
x1

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

In [34]:
x = np.random.rand(10, 4)

In [35]:
softmax(x)

array([[0.16394645, 0.27515663, 0.36124313, 0.19965379],
       [0.21713627, 0.35308321, 0.20272183, 0.22705869],
       [0.21870406, 0.17373347, 0.2481324 , 0.35943006],
       [0.35316787, 0.23724318, 0.1545083 , 0.25508066],
       [0.21451538, 0.26064441, 0.19023516, 0.33460504],
       [0.3683389 , 0.17241837, 0.16384272, 0.2954    ],
       [0.2147685 , 0.28450391, 0.25461846, 0.24610912],
       [0.3031185 , 0.34807621, 0.18103934, 0.16776594],
       [0.28552439, 0.27095201, 0.22210382, 0.22141978],
       [0.22309962, 0.33008248, 0.20541486, 0.24140305]])

In [36]:
def ReLU(x):
  return np.maximum(0, x)+1e-8

In [37]:
def ReLU_derivat(x):
  return x>0

In [38]:
def MSE(x, y):
  return np.sum(pow((x-y), 2))/len(x)

In [39]:
def MSE_derivat(x, y):
  return x - y

$$\frac{\partial L}{\partial W_3} = \frac{\partial L}{\partial O}\cdot \frac{\partial O}{\partial \sigma} \cdot \frac{\partial \sigma}{\partial o_3}\cdot\frac{\partial o_3}{\partial W_3} = \frac{\partial L}{\partial O} \cdot \frac{\partial \sigma}{\partial o_3}\cdot\ h_2 = \Biggl(\frac{\partial L}{\partial O}\odot \sigma'(o_3)\Biggr)  \cdot h_2$$

In [167]:
class NN2:
  def __init__(self, dims, acts):
    self.weights = []
    self.hiddens = []
    self.activated = []
    self.dims = dims
    self.acts = acts
  

  def init_params(self):
    for i in range(len(self.dims)-1):
      inc = self.dims[i]
      outc = self.dims[i+1]
      w_i = np.random.normal(0, 2, (inc, outc))
      self.weights.append(w_i)


  def forward(self, X):
    out = X 
    for i in range(len(self.dims)-1):
      out = out @ self.weights[i]
      self.hiddens.append(out)
      out = self.acts[i](out)
      self.activated.append(out)
    self.hiddens.append(X)
    return out


  def backward(self, y, lr):
    d_0 = self.hiddens[-1].T @ np.multiply(MSE_derivat(self.activated[-1], y), softmax_derivat(self.hiddens[-2]))
    print(d_0)
    self.weights[0] -= lr*d_0
    # return d_0.shape
  def printparam(self, param):
    if param=='weights':
      for i in (self.weights):
        print(i)
    elif param=='hiddens':
      for i in (self.hiddens):
        print(i)
    else:
      for i in (self.activated):
        print(i)


In [168]:
data = load_iris()
X = data['data']
y = data['target']
yt = np.zeros((len(y), y.max()+1))
yt[np.arange(len(y)), y] = 1

In [169]:
nn = NN2([4, 3], [softmax])
nn.init_params()
nn.forward(X)
nn.backward(yt, 0.01)

[[-2.29828264  7.24280425 25.34001755]
 [-1.56460449  5.31560797 17.09991644]
 [-0.61822823  1.9269717   8.32809765]
 [-0.09218124  0.327498    1.67748113]]


In [170]:
ep = 10
for i in range(ep+1):
  nn.forward(X)
  nn.backward(yt, 0.01)
  # nn.printparam('weights')

[[-0.29234454 18.03290402 43.83842192]
 [-2.58958078 12.76873563 26.551889  ]
 [ 6.18898064  5.05579454 18.12024558]
 [ 3.18108607  0.85514703  3.76425277]]
[[ 21.25954896  24.28439076  -9.23574266]
 [  2.72903675  16.8879533   -2.98856059]
 [ 30.63274928   6.98795595 -11.04699222]
 [ 11.23470095   1.15300839  -4.22444551]]
[[-18.69161035  10.84266194  50.15001763]
 [-14.4982251    7.85432636  30.25967252]
 [ -1.00394637   2.94268582  21.64393826]
 [  1.3105166    0.49547452   4.9861702 ]]
[[ 1.65616274  5.1334965  -2.39963115]
 [-0.89052253  3.79397047 -1.02645181]
 [ 4.14399392  1.37765037 -2.11076119]
 [ 1.5566758   0.20836256 -0.70947046]]
[[ 8.28134157  3.65548741 -4.37298916]
 [ 2.45659822  2.72016454 -1.86446779]
 [ 8.62324628  0.97369842 -3.8562675 ]
 [ 2.92163451  0.1445508  -1.30179659]]
[[ 37.58657031   5.07989114 -15.85365405]
 [ 14.84144792   3.75454821  -6.66902543]
 [ 33.14099955   1.36387874 -14.38102954]
 [ 11.05729167   0.20715451  -5.06319742]]
[[-25.34477313  12.178

In [165]:
nn.forward(X)

array([[4.76790471e-07, 3.11821431e-07, 9.99999211e-01],
       [2.84784737e-06, 9.21911388e-07, 9.99996230e-01],
       [1.75897362e-06, 1.07112511e-06, 9.99997170e-01],
       [2.25552829e-06, 9.97981624e-07, 9.99996746e-01],
       [3.61519679e-07, 3.06210073e-07, 9.99999332e-01],
       [2.52687649e-07, 9.78518748e-08, 9.99999649e-01],
       [1.53295754e-06, 9.39048283e-07, 9.99997528e-01],
       [6.56825752e-07, 3.50035496e-07, 9.99998993e-01],
       [5.30753079e-06, 2.19940001e-06, 9.99992493e-01],
       [1.09984695e-06, 5.16525974e-07, 9.99998384e-01],
       [1.91271996e-07, 1.22772459e-07, 9.99999686e-01],
       [6.86083803e-07, 3.85861759e-07, 9.99998928e-01],
       [1.74872557e-06, 8.41676790e-07, 9.99997410e-01],
       [2.89249346e-06, 2.99242614e-06, 9.99994115e-01],
       [6.90642135e-08, 7.53113924e-08, 9.99999856e-01],
       [4.60925277e-08, 4.16879416e-08, 9.99999912e-01],
       [3.36603158e-07, 2.06188217e-07, 9.99999457e-01],
       [8.22532819e-07, 3.93615

In [162]:
yt[3]

array([1., 0., 0.])

In [151]:
softmax(np.expand_dims(X[0], 0))

array([[0.81032902, 0.16360261, 0.02003419, 0.00603418]])

In [None]:
nn.printparam('hiddens')

In [68]:
softmax(X@nn.weights[0])

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [70]:
dd = X@nn.weights[0]

In [77]:
np.exp(dd)

array([[0.07363323],
       [0.09514679],
       [0.5306785 ],
       [0.20309256],
       [0.15378363]])

In [72]:
ss = np.exp(dd).sum(axis=1)

In [76]:
ss

array([0.07363323, 0.09514679, 0.5306785 , 0.20309256, 0.15378363])

In [74]:
np.divide(np.exp(dd).T, ss).T

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [81]:
nn.printparam('a')

[[0.98692037]
 [0.9984375 ]
 [0.99468606]
 [0.96072178]
 [0.91100583]]


In [82]:
data = load_iris()

In [83]:
class NN():
  def __init__(self, dims, acts):
    self.dims = dims
    self.acts = acts
    self.weights = []
    self.hiddens = []
    self.activated = []


  def init_params(self):
    for i in range(len(self.dims)-1):
      inc = self.dims[i]
      outc = self.dims[i+1]
      w_i = np.random.normal(0, 2, (inc, outc))
      self.weights.append(w_i)

  def forward(self, X):
    out = X
    for i in range(len(self.dims)-1):
      # inc = self.dims[i]
      # outc = self.dims[i+1]
      # w_i = np.random.normal(0, 2, (inc, outc))
      # self.weights.append(w_i)
      out = out @ self.weights[i]
      self.hiddens.append(out)
      out = self.acts[i](out) 
      self.activated.append(out)
    self.hiddens.append(X)
    return out

  def printparam(self):
    # print("weights:")
    # for i in self.weights:
    #   print(i)
    print("hiddens")
    for i in self.hiddens:
      print(i)
    # print("activated")
    # for i in self.activated:
    #   print(i.shape)

  def backward(self, y, lr):
    d_0 = self.hiddens[-1].T @ np.multiply(
        (np.multiply(
            np.multiply(
                MSE_derivat(self.activated[-1], y), softmax_derivat(self.activated[2])) \
            @ self.weights[2].T, 
            ReLU_derivat(self.activated[1])) @ self.weights[1].T), ReLU_derivat(self.activated[0]))
    d_1 = self.hiddens[0].T @ np.multiply((np.multiply(MSE_derivat(self.activated[-1], y), sigmoid_derivat(self.activated[2])) @ self.weights[2].T), ReLU_derivat(self.activated[1]))

    d_2 = self.hiddens[1].T @ np.multiply(MSE_derivat(self.activated[-1], y), softmax_derivat(self.activated[2]))
    self.weights[0] -= lr*d_0
    self.weights[1] -= lr*d_1
    self.weights[2] -= lr*d_2


In [84]:
X = data['data']
y = data['target']
# X = np.random.rand(4, 4)
# y = [1, 2, 2, 0]
nn = NN([4, 8, 4, 3], [ReLU, ReLU, softmax])
nn.init_params()

In [85]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [86]:
y_exp = np.zeros((len(y), max(y)+1))
y_exp[np.arange(len(y)), y] = 1

In [None]:
y_exp

In [92]:
nn.printparam()

hiddens


In [101]:
y

array([[0, 6, 0, 6]])

In [None]:
epochs = 10
for i in range(epochs+1):
  nn.forward(X)
  nn.printparam()
  nn.backward(y_exp, 0.01)

In [None]:
nn.printparam()

In [90]:
nn.forward(X[2])

AxisError: ignored

In [43]:
y_exp[2]

array([1., 0., 0.])