In [2]:
import math
import numpy as np
from src.datagen import DatasetGenerator

In [3]:
# define activation function
def sigma(z):
    return 1 / (1 + np.e**(-z)) # sigmoid

def sigma_prime(z):
    y = sigma(z)
    return y * (1 - y)

In [4]:
# define loss function
def loss(y_true, y_pred):
    return (y_true - y_pred)**2 / 2 # squared error loss

def loss_prime(y_true, y_pred):
    return -(y_true - y_pred) # squared error loss

In [5]:
# define training routine
def train(training_set, batch_size=32, epochs=10, eta=0.01, validation_set=None):
    for sample in training_set:
        # read sample
        x = sample['data']
        y_true = sample['label']

        # feed forward
        z = [np.empty(0)]
        a = [x]
        for i, (w, b) in enumerate(zip(weights, biases)):
            z.append(w @ a[i] + b)  # weighted sum
            a.append(sigma(z[i + 1]))  # activation

        # backpropagate
        gradient_i = loss_prime(y_true, a[-1])
        for i in range(1, len(weights) + 1):
            if i == 1:
                w_i = np.identity(gradient_i.shape[0])
            else:
                w_i = weights[-i + 1].T

            gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
            weight_gradient_i = gradient_i @ a[-i - 1].T
            weights[-i] -= eta * weight_gradient_i
            biases[-i] -= eta * gradient_i

In [6]:
# define validation routine
def feed_forward(x):
    for w, b in zip(weights, biases):
        z = w @ x + b  # weighted sum
        y = sigma(z)  # activation
        x = y  # output of this layer is input of the next
    return x

def validate(validation_set, verbose=False, print_samples=10):
    average_loss = 0
    accuracy = 0
    num_samples = 0
    for sample in validation_set:
        x = sample['data']
        y_pred = feed_forward(x)
        y_true = sample['label']

        sample_loss = loss(y_true, y_pred)

        num_samples += 1
        average_loss += sample_loss
        if np.array_equal(np.round(y_pred), y_true):
            accuracy += 1

    accuracy /= num_samples
    average_loss /= num_samples
    print(f"Accuracy: {accuracy:<10} Average Loss: {average_loss}")

In [7]:
# create training set and test set
datagen = DatasetGenerator(lambda x, y: int(x * math.sin(x) - y * math.cos(y) > 0))
training_set = list(datagen.generate_samples(1000000))
test_set = list(datagen.generate_samples(10000))

In [8]:
# define network
dims = [2, 4, 1]
weights = []
biases = []
for i in range(len(dims) - 1):
    num_neurons = dims[i + 1]
    num_weights = dims[i]
    weights.append(np.random.randn(num_neurons, num_weights))
    biases.append(np.random.randn(num_neurons, 1))

for w, b in zip(weights, biases):
    print(w)
    print(b)
    print()

[[-0.12577488 -0.04156878]
 [ 0.91719596 -1.66969848]
 [-1.32438206 -1.36592078]
 [-0.85760964  1.69802471]]
[[-0.69954978]
 [-2.46421343]
 [-1.58590568]
 [-0.22694209]]

[[-0.22736597  0.85507586  0.21094454  0.7561336 ]]
[[-1.1741493]]



In [9]:
validate(test_set)

Accuracy: 0.2346     Average Loss: [[0.18624124]]


In [10]:
# create batch of samples
batch_size = 32
x = training_set[0]['data']
y_true = training_set[0]['label']
for i in range(1, batch_size):
    x = np.hstack((x, training_set[i]['data']))
    y_true = np.hstack((y_true, training_set[i]['label']))

print(x)
print(y_true)

[[ 0.85469287  1.43014563 -1.49381389 -0.93101493 -1.24957728 -1.04611971
   0.54875488 -0.02878888  1.3839304  -1.25173817  0.35877349  0.64832337
   0.59558908 -0.22474586  1.67492692  1.77137423 -2.13930779  0.9324683
  -0.12733466 -0.3935203   0.34739072 -0.38538803 -0.35621034  0.85913237
  -0.84827385  0.02156693  0.3709242   1.76775351 -0.70045741 -0.08714703
   1.36227411 -0.2177514 ]
 [-0.46254121  0.2085342   0.60672189 -0.3978825  -1.20655805  0.28911544
  -0.01976887  1.12009763  1.12707697 -0.51588889 -1.24559596 -0.85919072
   0.95353583 -0.99766473 -1.4451743   0.48023685  0.71216777  0.03959554
  -0.49976874  0.28581001  0.44038408  2.32082627 -0.13466385  0.01951393
   0.63961669  0.01509406 -0.20182126 -0.52377775  0.31442292 -0.17869961
   0.76729056  0.68287616]]
[[1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0]]


In [11]:
# feed forward sample
z = [None]
a = [x]
for i, (w, b) in enumerate(zip(weights, biases)):
    z.append(w @ a[i] + b)  # weighted sum
    a.append(sigma(z[i + 1]))  # activation

for z_i, a_i in zip(z, a):
    print(z_i)
    print(a_i)
    print()

None
[[ 0.85469287  1.43014563 -1.49381389 -0.93101493 -1.24957728 -1.04611971
   0.54875488 -0.02878888  1.3839304  -1.25173817  0.35877349  0.64832337
   0.59558908 -0.22474586  1.67492692  1.77137423 -2.13930779  0.9324683
  -0.12733466 -0.3935203   0.34739072 -0.38538803 -0.35621034  0.85913237
  -0.84827385  0.02156693  0.3709242   1.76775351 -0.70045741 -0.08714703
   1.36227411 -0.2177514 ]
 [-0.46254121  0.2085342   0.60672189 -0.3978825  -1.20655805  0.28911544
  -0.01976887  1.12009763  1.12707697 -0.51588889 -1.24559596 -0.85919072
   0.95353583 -0.99766473 -1.4451743   0.48023685  0.71216777  0.03959554
  -0.49976874  0.28581001  0.44038408  2.32082627 -0.13466385  0.01951393
   0.63961669  0.01509406 -0.20182126 -0.52377775  0.31442292 -0.17869961
   0.76729056  0.68287616]]

[[-0.78782139 -0.88809468 -0.5368862  -0.56591199 -0.49222919 -0.57999237
  -0.76774759 -0.74248995 -0.92046467 -0.52066768 -0.69289656 -0.74537706
  -0.81409724 -0.62981068 -0.85013937 -0.94230702 -0

In [12]:
# backpropagate sample
eta = 1

delta_weights = []
delta_biases = []

gradient_i = loss_prime(y_true, a[-1])
for i in range(1, len(weights) + 1):
    if i == 1:
        w_i = np.identity(gradient_i.shape[0])
    else:
        w_i = weights[-i + 1].T

    gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
    weight_gradient_i = gradient_i @ a[-i - 1].T
    bias_gradient_i = gradient_i @ np.ones((batch_size, 1))

    print(f"weights {i}:\t\t {w_i.shape}")
    print(f"sigma_prime {i}:\t {sigma_prime(z[-i]).shape}")
    print(f"gradient {i}:\t\t {gradient_i.shape}")
    print(f"activation {i - 1}:\t {a[-i - 1].T.shape}")
    print()

    delta_weights.append(eta * weight_gradient_i)
    delta_biases.append(eta * bias_gradient_i)

for dw, db in zip(delta_weights, delta_biases):
    print(dw)
    print(db)
    print()

weights 1:		 (1, 1)
sigma_prime 1:	 (1, 32)
gradient 1:		 (1, 32)
activation 0:	 (32, 4)

weights 2:		 (4, 1)
sigma_prime 2:	 (4, 32)
gradient 2:		 (4, 32)
activation 1:	 (32, 2)

[[-1.11698688 -0.64389237 -0.92099268 -1.29053779]]
[[-3.38154356]]

[[ 0.01252563 -0.0233166 ]
 [-0.19989891  0.16402863]
 [ 0.04334562  0.03092111]
 [-0.053431    0.04966222]]
[[ 0.16927823]
 [-0.3381324 ]
 [-0.10252591]
 [-0.40378182]]

