In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import numpy as np

In [3]:
if ".." not in sys.path:
    sys.path.append("..")

In [18]:
from raw.network import MultiClassNN
from raw.losses import cross_entropy_with_logits, d_cross_entropy_with_logits
from raw.activations import softmax, relu, d_relu

In [19]:
input_size = 10
hidden_layers = [5]
output_size = 2
model = MultiClassNN(input_size, hidden_layers, output_size)

In [20]:
model

LinearANN {
  Linear - weights: (10, 5), bias: (5,)
  Linear - weights: (5, 2), bias: (2,)
}

In [21]:
def one_hot(positive_class_inds, batch_size, output_size):
    y = np.zeros((batch_size, output_size), dtype=np.float)
    y[np.arange(len(y)), positive_class_inds] = 1
    return y

In [22]:
batch_size = 2
x = np.random.normal(size=(batch_size, input_size))
y = np.random.randint(0, 2, size=(batch_size,))
print(y)
y = one_hot(y, batch_size, output_size)

[0 1]


In [23]:
y, x

(array([[1., 0.],
        [0., 1.]]), array([[ 1.77979092e-01,  1.71495653e+00, -1.31959150e-03,
         -6.34326476e-01,  7.81478045e-02, -1.20284598e+00,
          7.45626552e-01, -1.41907464e-01, -1.68792807e+00,
          5.60188521e-01],
        [ 1.04216230e+00,  1.41963399e+00,  1.22515227e-01,
          5.69501348e-01, -4.28790233e-01, -2.67170334e-02,
         -1.17392385e+00,  1.03059318e-02,  4.73906365e-01,
         -4.40513629e-01]]))

In [24]:
out = model(x)
out.shape

(2, 2)

In [25]:
softmax(out)

array([[0.49383622, 0.50616378],
       [0.63188709, 0.36811291]])

In [26]:
loss_per_batch_per_neuron = cross_entropy_with_logits(out, y)

In [27]:
loss_per_batch_per_neuron

array([[0.70555135, 0.        ],
       [0.        , 0.99936558]])

In [28]:
batch_loss = np.mean(np.sum(loss_per_batch_per_neuron, -1))
print("Batch loss", batch_loss)

Batch loss 0.8524584655973513


In [29]:
loss_error = d_cross_entropy_with_logits(out, y)

In [30]:
loss_error

array([[-2.02496285, -0.        ],
       [-0.        , -2.71655784]])

Sense check. Remember if the loss error is negative then it means increasing the activation value will send the loss down and decreasing will increase it.

In [31]:
out, y

(array([[-0.06281067, -0.03815431],
        [ 0.48775348, -0.05256755]]), array([[1., 0.],
        [0., 1.]]))

In [36]:
d_relu(out)

array([[0., 0.],
       [1., 0.]])

In [35]:
relu(out)

array([[0.        , 0.        ],
       [0.48775348, 0.        ]])

In [39]:
model.backward(y)

In [41]:
for _ in range(100):
    out = model(x)
    print(softmax(out), y)
    neuron_loss = cross_entropy_with_logits(out, y)
    batch_loss = np.mean(np.sum(neuron_loss, -1))
    model.backward(y, lr=0.1)

[[0.99001028 0.00998972]
 [0.13584637 0.86415363]] [[1. 0.]
 [0. 1.]]
[[0.99016409 0.00983591]
 [0.13437567 0.86562433]] [[1. 0.]
 [0. 1.]]
[[0.9903138  0.0096862 ]
 [0.13293362 0.86706638]] [[1. 0.]
 [0. 1.]]
[[0.99045957 0.00954043]
 [0.13151946 0.86848054]] [[1. 0.]
 [0. 1.]]
[[0.99060154 0.00939846]
 [0.13013244 0.86986756]] [[1. 0.]
 [0. 1.]]
[[0.99073986 0.00926014]
 [0.12877185 0.87122815]] [[1. 0.]
 [0. 1.]]
[[0.99087465 0.00912535]
 [0.12743699 0.87256301]] [[1. 0.]
 [0. 1.]]
[[0.99100604 0.00899396]
 [0.12612719 0.87387281]] [[1. 0.]
 [0. 1.]]
[[0.99113415 0.00886585]
 [0.1248418  0.8751582 ]] [[1. 0.]
 [0. 1.]]
[[0.9912591 0.0087409]
 [0.1235802 0.8764198]] [[1. 0.]
 [0. 1.]]
[[0.99138099 0.00861901]
 [0.12234176 0.87765824]] [[1. 0.]
 [0. 1.]]
[[0.99149994 0.00850006]
 [0.12112591 0.87887409]] [[1. 0.]
 [0. 1.]]
[[0.99161604 0.00838396]
 [0.11993206 0.88006794]] [[1. 0.]
 [0. 1.]]
[[0.99172938 0.00827062]
 [0.11875967 0.88124033]] [[1. 0.]
 [0. 1.]]
[[0.99184007 0.00815993]