In [1]:
import math
import numpy as np
from src.dataset import DatasetGenerator

In [2]:
# define activation function
def sigma(z):
    return 1 / (1 + np.e**(-z)) # sigmoid

def sigma_prime(z):
    y = sigma(z)
    return y * (1 - y)

In [3]:
# define loss function
def loss(y_true, y_pred):
    return (y_true - y_pred)**2 / 2 # squared error loss

def loss_prime(y_true, y_pred):
    return -(y_true - y_pred) # squared error loss

In [4]:
# define training routine
def train(training_set, batch_size=32, epochs=10, eta=0.01, validation_set=None):
    for sample in training_set:
        # read sample
        x = sample['data']
        y_true = sample['label']

        # feed forward
        z = [np.empty(0)]
        a = [x]
        for i, (w, b) in enumerate(zip(weights, biases)):
            z.append(w @ a[i] + b)  # weighted sum
            a.append(sigma(z[i + 1]))  # activation

        # backpropagate
        gradient_i = loss_prime(y_true, a[-1])
        for i in range(1, len(weights) + 1):
            if i == 1:
                w_i = np.identity(gradient_i.shape[0])
            else:
                w_i = weights[-i + 1].T

            gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
            weight_gradient_i = gradient_i @ a[-i - 1].T
            weights[-i] -= eta * weight_gradient_i
            biases[-i] -= eta * gradient_i

In [5]:
# define validation routine
def feed_forward(x):
    for w, b in zip(weights, biases):
        z = w @ x + b  # weighted sum
        y = sigma(z)  # activation
        x = y  # output of this layer is input of the next
    return x

def validate(validation_set, verbose=False, print_samples=10):
    average_loss = 0
    accuracy = 0
    num_samples = 0
    for sample in validation_set:
        x = sample['data']
        y_pred = feed_forward(x)
        y_true = sample['label']

        sample_loss = loss(y_true, y_pred)

        num_samples += 1
        average_loss += sample_loss
        if np.array_equal(np.round(y_pred), y_true):
            accuracy += 1

    accuracy /= num_samples
    average_loss /= num_samples
    print(f"Accuracy: {accuracy:<10} Average Loss: {average_loss}")

In [6]:
# create training set and test set
datagen = DatasetGenerator(lambda x, y: int(x * math.sin(x) - y * math.cos(y) > 0))
training_set = list(datagen.generate_samples(1000000))
test_set = list(datagen.generate_samples(10000))

In [7]:
# define network
dims = [2, 4, 1]
weights = []
biases = []
for i in range(len(dims) - 1):
    num_neurons = dims[i + 1]
    num_weights = dims[i]
    weights.append(np.random.randn(num_neurons, num_weights))
    biases.append(np.random.randn(num_neurons, 1))

for w, b in zip(weights, biases):
    print(w)
    print(b)
    print()

[[-0.61544505 -0.38541298]
 [-0.35821369  0.16793369]
 [-0.4509148   1.06172597]
 [-0.15527853 -0.38658605]]
[[ 0.35723153]
 [ 0.7538094 ]
 [-0.04083964]
 [ 0.37778557]]

[[ 1.13396981 -1.61204574  0.0072791  -1.19017144]]
[[-1.45410721]]



In [8]:
validate(test_set)

Accuracy: 0.2309     Average Loss: [[0.33275963]]


In [36]:
# create batch of samples
batch_size = 32
x = training_set[0]['data']
y_true = training_set[0]['label']
for i in range(1, batch_size):
    x = np.hstack((x, training_set[i]['data']))
    y_true = np.hstack((y_true, training_set[i]['label']))

print(x)
print(y_true)

[[ 0.94132473  0.9117128  -0.4748528   0.74725123 -0.25882162  0.46913891
   1.1521934  -1.19185979  1.22383366 -0.65380156  1.60000012  0.48180448
  -2.01483311 -1.11980221 -0.58649699  0.19211943  0.77018437 -0.65301071
  -1.76795152 -0.9865179  -0.53597313 -1.39069752  1.00929053  0.12180956
   0.93437111  1.61649554  2.12334997  1.29407906  0.85938419 -1.11560809
   1.55410494  0.58235799]
 [-0.03929301 -0.36040814  0.2494835  -0.45123034  1.50338661  1.00678884
   0.60362785 -2.17833247 -0.83910535  1.22588032  0.08397148 -0.51184758
   0.55234493  0.01815235 -0.91062511  0.86878257 -0.6000085   1.01608405
   0.06335135  0.79473105 -1.72252067  1.21072293  0.15316771 -0.55107546
   0.3884342   0.61158082 -0.23693186  0.65327687  0.24903584  0.11527263
  -2.23069625  0.75967236]]
[[1 1 0 1 0 0 1 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0]]


In [37]:
# feed forward sample
z = [None]
a = [x]
for i, (w, b) in enumerate(zip(weights, biases)):
    z.append(w @ a[i] + b)  # weighted sum
    a.append(sigma(z[i + 1]))  # activation

for z_i, a_i in zip(z, a):
    print(z_i)
    print(a_i)
    print()

None
[[ 0.94132473  0.9117128  -0.4748528   0.74725123 -0.25882162  0.46913891
   1.1521934  -1.19185979  1.22383366 -0.65380156  1.60000012  0.48180448
  -2.01483311 -1.11980221 -0.58649699  0.19211943  0.77018437 -0.65301071
  -1.76795152 -0.9865179  -0.53597313 -1.39069752  1.00929053  0.12180956
   0.93437111  1.61649554  2.12334997  1.29407906  0.85938419 -1.11560809
   1.55410494  0.58235799]
 [-0.03929301 -0.36040814  0.2494835  -0.45123034  1.50338661  1.00678884
   0.60362785 -2.17833247 -0.83910535  1.22588032  0.08397148 -0.51184758
   0.55234493  0.01815235 -0.91062511  0.86878257 -0.6000085   1.01608405
   0.06335135  0.79473105 -1.72252067  1.21072293  0.15316771 -0.55107546
   0.3884342   0.61158082 -0.23693186  0.65327687  0.24903584  0.11527263
  -2.23069625  0.75967236]]

[[-0.20695808 -0.06497162  0.55332316  0.07124949 -0.0629027  -0.31952718
  -0.5845262   1.93031335 -0.07256874  0.28714028 -0.65984432  0.25798005
   1.3843697   1.03941211  1.06915494 -0.0958475   

In [38]:
# backpropagate sample
eta = 1

delta_weights = []
delta_biases = []

gradient_i = loss_prime(y_true, a[-1])
for i in range(1, len(weights) + 1):
    if i == 1:
        w_i = np.identity(gradient_i.shape[0])
    else:
        w_i = weights[-i + 1].T

    gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
    weight_gradient_i = gradient_i @ a[-i - 1].T
    bias_gradient_i = gradient_i @ np.ones((batch_size, 1))

    print(f"weights {i}:\t\t {w_i.shape}")
    print(f"sigma_prime {i}:\t {sigma_prime(z[-i]).shape}")
    print(f"gradient {i}:\t\t {gradient_i.shape}")
    print(f"activation {i - 1}:\t {a[-i - 1].T.shape}")
    print()

    delta_weights.append(eta * weight_gradient_i)
    delta_biases.append(eta * bias_gradient_i)

for dw, db in zip(delta_weights, delta_biases):
    print(dw)
    print(db)
    print()

weights 1:		 (1, 1)
sigma_prime 1:	 (1, 32)
gradient 1:		 (1, 32)
activation 0:	 (32, 4)

weights 2:		 (4, 1)
sigma_prime 2:	 (4, 32)
gradient 2:		 (4, 32)
activation 1:	 (32, 2)

[[-0.78060734 -0.91641311 -0.60917308 -0.85178748]]
[[-1.43671133]]

[[-1.66090281e-01  6.37159851e-02]
 [ 2.67562003e-01 -1.07819876e-01]
 [-8.16738708e-04  1.17011870e-04]
 [ 1.51471501e-01 -4.76918803e-02]]
[[-0.36001047]
 [ 0.51210971]
 [-0.00210154]
 [ 0.4036404 ]]

