In [21]:
import math
import numpy as np
from src.datagen import DatasetGenerator

In [22]:
# define activation function
def sigma(z):
    return 1 / (1 + np.e**(-z)) # sigmoid

def sigma_prime(z):
    y = sigma(z)
    return y * (1 - y)

In [17]:
# define loss function
def loss(y_true, y_pred):
    return (y_true - y_pred)**2 / 2 # squared error loss

def loss_prime(y_true, y_pred):
    return -(y_true - y_pred) # squared error loss

In [18]:
# define training routine
def train(training_set, batch_size=32, epochs=10, eta=0.01, validation_set=None):
    for sample in training_set:
        # read sample
        x = sample['data']
        y_true = sample['label']

        # feed forward
        z = [np.empty(0)]
        a = [x]
        for i, (w, b) in enumerate(zip(weights, biases)):
            z.append(w @ a[i] + b)  # weighted sum
            a.append(sigma(z[i + 1]))  # activation

        # backpropagate
        gradient_i = loss_prime(y_true, a[-1])
        for i in range(1, len(weights) + 1):
            if i == 1:
                w_i = np.identity(gradient_i.shape[0])
            else:
                w_i = weights[-i + 1].T

            gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
            weight_gradient_i = gradient_i @ a[-i - 1].T
            weights[-i] -= eta * weight_gradient_i
            biases[-i] -= eta * gradient_i

In [19]:
# define validation routine
def feed_forward(x):
    for w, b in zip(weights, biases):
        z = w @ x + b  # weighted sum
        y = sigma(z)  # activation
        x = y  # output of this layer is input of the next
    return x

def validate(validation_set, verbose=False, print_samples=10):
    average_loss = 0
    accuracy = 0
    num_samples = 0
    for sample in validation_set:
        x = sample['data']
        y_pred = feed_forward(x)
        y_true = sample['label']

        sample_loss = loss(y_true, y_pred)

        num_samples += 1
        average_loss += sample_loss
        if np.array_equal(np.round(y_pred), y_true):
            accuracy += 1

    accuracy /= num_samples
    average_loss /= num_samples
    print(f"Accuracy: {accuracy:<10} Average Loss: {average_loss}")

In [23]:
# create training set and test set
datagen = DatasetGenerator(lambda x, y: int(x * math.sin(x) - y * math.cos(y) > 0))
training_set = list(datagen.generate_samples(1000000))
test_set = list(datagen.generate_samples(10000))

In [24]:
# define network
dims = [2, 4, 1]
weights = []
biases = []
for i in range(len(dims) - 1):
    num_neurons = dims[i + 1]
    num_weights = dims[i]
    weights.append(np.random.randn(num_neurons, num_weights))
    biases.append(np.random.randn(num_neurons, 1))

for w, b in zip(weights, biases):
    print(w)
    print(b)
    print()

[[-0.25598582  0.07432802]
 [-1.24015528  0.22935381]
 [-0.87693144 -0.04319777]
 [ 0.64505426  0.08628289]]
[[1.47220724]
 [0.83315479]
 [0.26152609]
 [0.48671254]]

[[ 0.62086154  1.73202972 -0.34463041 -1.91601346]]
[[-0.01202509]]



In [25]:
validate(test_set)

Accuracy: 0.5055     Average Loss: [[0.12363905]]


In [26]:
# create batch of samples
batch_size = 32
x = training_set[0]['data']
y_true = training_set[0]['label']
for i in range(1, batch_size):
    x = np.hstack((x, training_set[i]['data']))
    y_true = np.hstack((y_true, training_set[i]['label']))

print(x)
print(y_true)

[[-0.08286058 -0.16561444  0.81325145 -0.38318923 -0.93427263  0.67678388
  -0.25942844 -1.20406114 -1.40323003 -0.282881   -0.7878895   0.19245011
  -0.13678735  1.27363488  1.13478781 -0.01599958  0.31650258 -2.26735391
   1.01470395 -0.40771864  0.16582471  1.68481218  0.11313136 -1.16435185
   0.58753389 -0.77435293  0.70944193 -1.13715183 -0.51459813 -0.44463346
  -0.17175865 -0.02177528]
 [ 1.23209888 -1.34995484 -0.44609543  0.7712027  -0.35683213 -0.13805914
   0.85229779 -0.01283679 -1.38929679 -0.32422991 -0.04629461 -0.20362991
  -0.19902883  0.74259767 -0.21765863 -0.81597707  0.47038889 -0.30139455
   0.32267048  0.52131731  1.45329427  1.06753956 -1.18077247  0.25586608
   0.1613435   0.62978426 -0.78952235  0.27100745 -0.63661874 -1.26228822
  -0.76584267 -1.71169478]]
[[0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0]]


In [27]:
# feed forward sample
z = [None]
a = [x]
for i, (w, b) in enumerate(zip(weights, biases)):
    z.append(w @ a[i] + b)  # weighted sum
    a.append(sigma(z[i + 1]))  # activation

for z_i, a_i in zip(z, a):
    print(z_i)
    print(a_i)
    print()

None
[[-0.08286058 -0.16561444  0.81325145 -0.38318923 -0.93427263  0.67678388
  -0.25942844 -1.20406114 -1.40323003 -0.282881   -0.7878895   0.19245011
  -0.13678735  1.27363488  1.13478781 -0.01599958  0.31650258 -2.26735391
   1.01470395 -0.40771864  0.16582471  1.68481218  0.11313136 -1.16435185
   0.58753389 -0.77435293  0.70944193 -1.13715183 -0.51459813 -0.44463346
  -0.17175865 -0.02177528]
 [ 1.23209888 -1.34995484 -0.44609543  0.7712027  -0.35683213 -0.13805914
   0.85229779 -0.01283679 -1.38929679 -0.32422991 -0.04629461 -0.20362991
  -0.19902883  0.74259767 -0.21765863 -0.81597707  0.47038889 -0.30139455
   0.32267048  0.52131731  1.45329427  1.06753956 -1.18077247  0.25586608
   0.1613435   0.62978426 -0.78952235  0.27100745 -0.63661874 -1.26228822
  -0.76584267 -1.71169478]]

[[ 1.58499785  1.41426272  1.23086902  1.62762022  1.68484516  1.28869851
   1.60196685  1.77947569  1.72815055  1.5205214   1.6704548   1.40780734
   1.49242945  1.20137059  1.16553952  1.41565295  

In [28]:
# backpropagate sample
eta = 1

delta_weights = []
delta_biases = []

gradient_i = loss_prime(y_true, a[-1])
for i in range(1, len(weights) + 1):
    if i == 1:
        w_i = np.identity(gradient_i.shape[0])
    else:
        w_i = weights[-i + 1].T

    gradient_i = (w_i @ gradient_i) * sigma_prime(z[-i])
    weight_gradient_i = gradient_i @ a[-i - 1].T
    bias_gradient_i = gradient_i @ np.ones((batch_size, 1))

    print(f"weights {i}:\t\t {w_i.shape}")
    print(f"sigma_prime {i}:\t {sigma_prime(z[-i]).shape}")
    print(f"gradient {i}:\t\t {gradient_i.shape}")
    print(f"activation {i - 1}:\t {a[-i - 1].T.shape}")
    print()

    delta_weights.append(eta * weight_gradient_i)
    delta_biases.append(eta * bias_gradient_i)

for dw, db in zip(delta_weights, delta_biases):
    print(dw)
    print(db)
    print()

weights 1:		 (1, 1)
sigma_prime 1:	 (1, 32)
gradient 1:		 (1, 32)
activation 0:	 (32, 4)

weights 2:		 (4, 1)
sigma_prime 2:	 (4, 32)
gradient 2:		 (4, 32)
activation 1:	 (32, 2)

[[-1.13333085 -0.71853021 -0.68165152 -0.94451327]]
[[-1.44475018]]

[[-0.08519533  0.09579633]
 [-0.36412144  0.35181175]
 [ 0.05576336 -0.09358951]
 [ 0.15232484 -0.53345559]]
[[-0.15086828]
 [-0.52770717]
 [ 0.10240861]
 [ 0.55502445]]

