In [1]:
# Imports
import numpy as np

In [2]:
# Import the tools
from sys import path
path.append('/Users/reid/dev/PythonCode/tm/tools')
path.append('D:/PythonCode/tm/tools')
from tools import gen_inputs_outputs, logsig, lin, deriv_logsig, deriv_lin, e2

In [3]:
# Define the size of the network
# Order by layers (input, layer1, layer2, ...)
# !!! No output layer yet, wait to gen inputs
layer_sizes = [4, 7]

In [4]:
# Gen inputs, outputs and size of the last layer
inputs, outputs, S = gen_inputs_outputs(layer_sizes[0])

In [5]:
# Append the output layer size
layer_sizes = np.array(layer_sizes + [S], dtype=int)

In [6]:
# Network shape
layer_sizes

array([4, 7, 3])

In [7]:
#####
# layer_sizes = np.array([1,2,1])

In [8]:
# Inputs
inputs

array([[0, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 0, 1, 1],
       [0, 1, 0, 0],
       [0, 1, 0, 1],
       [0, 1, 1, 0],
       [0, 1, 1, 1],
       [1, 0, 0, 0],
       [1, 0, 0, 1],
       [1, 0, 1, 0],
       [1, 0, 1, 1],
       [1, 1, 0, 0],
       [1, 1, 0, 1],
       [1, 1, 1, 0],
       [1, 1, 1, 1]])

In [9]:
# Outputs
outputs

array([[0, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 1],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 1],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 1],
       [1, 0, 0],
       [1, 0, 1],
       [0, 1, 1],
       [1, 0, 0],
       [1, 0, 1],
       [1, 1, 0]])

In [10]:
# Intialize the weights and biases randomly
# Also create the list to store the outputs of each layer
# Store the outptut for each layer
# Finally create the list of the sensitivities
weights_list = []
biases_list = []
n_list = []
a_list = []
s_list = []

In [11]:
# Define a function to reset all the parameters
# This is done to test multiple different network types
def reset_params():
    weights_list.clear()
    biases_list.clear()
    n_list.clear()
    a_list.clear()
    s_list.clear()
    for i in range(1, len(layer_sizes)):
        weights_list.append(np.random.rand(layer_sizes[i], layer_sizes[i-1]))
        biases_list.append(np.random.rand(layer_sizes[i], 1))
        n_list.append(np.empty((layer_sizes[i], 1), dtype=float))
        a_list.append(np.empty((layer_sizes[i], 1), dtype=float))
        s_list.append(np.empty((layer_sizes[i], 1), dtype=float))

In [12]:
#####
'''weights_list = [
    np.array([[-0.27],[-0.41]]),
    np.array([[0.09, -0.17]])
]
biases_list = [
    np.array([[-0.48], [-0.13]]),
    np.array([[0.48]])
]
n_list = [
    np.array([[0], [0]]),
    np.array([[0]])
]
a_list = [
    np.array([[0], [0]]),
    np.array([[0]])
]'''

'weights_list = [\n    np.array([[-0.27],[-0.41]]),\n    np.array([[0.09, -0.17]])\n]\nbiases_list = [\n    np.array([[-0.48], [-0.13]]),\n    np.array([[0.48]])\n]\nn_list = [\n    np.array([[0], [0]]),\n    np.array([[0]])\n]\na_list = [\n    np.array([[0], [0]]),\n    np.array([[0]])\n]'

In [13]:
# Define the transfer functions
transfer_functions = [logsig, lin]

In [14]:
# Define the vectors with the derivatives of the transfer functions
# These need to be converted to dialation matrices
# but this is done after the partial derivatives are calculated
# so that the numerical values can be multiplied by the identity matrix
# makes my life easier
deriv_transfer_functions = [deriv_logsig, deriv_lin]

In [15]:
# Define a function to calculate the output of the network
# while also saving the data
# Assuming that the input is already a Rx1 matrix
def run_network(X):
    # Get the input for the first layer so that
    # no need to use X and can calculate
    # recursively
    n_list[0] = np.matmul(weights_list[0], X) + biases_list[0]
    a_list[0] = transfer_functions[0](n_list[0])

    # Calculate the rest of the outptut
    for i in range(1, len(weights_list)):
        n_list[i] = np.matmul(weights_list[i], a_list[i-1]) + biases_list[i]
        a_list[i] = transfer_functions[i](n_list[i])

In [16]:
# Calculate the senstivites for the last layer
# Write a function to fill the diagonal of the matrix
def fill_F_dot(layer_num):
    a = np.identity(len(n_list[layer_num]), float)
    np.fill_diagonal(a, deriv_transfer_functions[layer_num](n_list[layer_num]).flatten())
    return a

def train_network(learning_rate=.1, epochs=1):
    for i in range(epochs):
        # Iterate through the inputs and outputs
        for i in range(len(inputs)):
            # Run the network
            run_network(inputs[i].reshape(layer_sizes[0], 1))

            # Get the dialation matrix for the last layer
            F_dot = fill_F_dot(-1)

            # Calculate the sensitivites for the last layer
            s_list[-1] = -2 * np.matmul(F_dot, outputs[i].reshape(layer_sizes[-1], 1)-a_list[-1])

            # Update the weights and biases for the last layer
            weights_list[-1] += -learning_rate*np.matmul(s_list[-1], a_list[-2].T)
            biases_list[-1] += -learning_rate*s_list[-1]

            # Iterate through the remaining layers backwards
            for i in range(len(n_list)-2, 0, -1):
                s_list[i] = np.matmul(np.matmul(fill_F_dot(i), weights_list[i+1].T), s_list[i+1])
                weights_list[i] += -learning_rate*np.matmul(s_list[i], a_list[i-1].T)
                biases_list[i] += -learning_rate*s_list[i]

In [17]:
# Function to evaluate the performance
# of the network
def performance():
    results = np.empty((len(inputs), layer_sizes[-1]), dtype=float)
    for i in range(len(inputs)):
        run_network(inputs[i].reshape(layer_sizes[0], 1))
        results[i] = outputs[i] - a_list[-1].flatten()
    return e2(results)

In [46]:
# Print performance
def print_performance(performance_dict):
    for a in sorted(performance_dict.items(), key=lambda x: x[0]):
        print(f'Error: {a[0]} | Layers: {a[1][0]}, Learning rate: {a[1][1]}, Epoch: {a[1][2]}')

In [47]:
# Trains netowrk using the parameters for each bit size
def get_performance_per_bit_size(layer_sizes_list, performance_dict):
    for layer_sizes in layer_sizes_list:
        # Get the inputs and outputs for the specifiq network size
        inputs, outputs, S = gen_inputs_outputs(layer_sizes[0])
        layer_sizes.append(S)
    
        # Iterate through the different learning rates
        # and epochs
        for rate in learning_rates:
            for epochs in epochs_list:
                # Reset weights and biases
                reset_params()
                # Train the network
                train_network(learning_rate=rate, epochs=epochs)
                # Update the performance dict
                performance_dict.update({performance(): (layer_sizes, rate, epochs)})

In [56]:
# Test netowrk performance over multiple different parameters
# Multiple different layer sizes
# Start with 1 hidden layer
layer_sizes_list_2bits = [[4, 2*(2**i)]for i in range(10)]
layer_sizes_list_4bits = [[8, 2*(2**i)]for i in range(10)]
layer_sizes_list_8bits = [[16, 2*(2**i)]for i in range(10)]

# Set different learning rates
learning_rates = [1/(16*(2**i)) for i in range(10)]

# Set the diffrent epochs
epochs_list = [i for i in range(1, 21)]

print(layer_sizes_list_2bits)

[[4, 2], [4, 4], [4, 8], [4, 16], [4, 32], [4, 64], [4, 128], [4, 256], [4, 512], [4, 1024]]


In [57]:
# Get the performance of the network
performance_dict_2_bits = {}
performance_dict_4_bits = {}
performance_dict_8_bits = {}

print('-----2 bits performance-----')
get_performance_per_bit_size(layer_sizes_list_2bits, performance_dict_2_bits)
print_performance(performance_dict_2_bits)
print()

#print('-----4 bits performance-----')
#get_performance_per_bit_size(layer_sizes_list_4bits, performance_dict_4_bits)
#print_performance(performance_dict_4_bits)
#print()
#
#print('-----8 bits performance-----')
#get_performance_per_bit_size(layer_sizes_list_4bits, performance_dict_4_bits)
#print_performance(performance_dict_4_bits)
#print()

-----2 bits performance-----
Error: 10.737772437045507 | Layers: [4, 16, 3], Learning rate: 0.00390625, Epoch: 11
Error: 10.842252282587234 | Layers: [4, 64, 3], Learning rate: 0.001953125, Epoch: 18
Error: 10.843081290925142 | Layers: [4, 64, 3], Learning rate: 0.00390625, Epoch: 17
Error: 10.857842022749338 | Layers: [4, 16, 3], Learning rate: 0.0078125, Epoch: 6
Error: 10.86147637292644 | Layers: [4, 512, 3], Learning rate: 0.0078125, Epoch: 10
Error: 10.892907108155864 | Layers: [4, 1024, 3], Learning rate: 0.0078125, Epoch: 10
Error: 10.901364102082294 | Layers: [4, 512, 3], Learning rate: 0.00390625, Epoch: 7
Error: 10.901422054075223 | Layers: [4, 64, 3], Learning rate: 0.00390625, Epoch: 11
Error: 10.917564351414207 | Layers: [4, 4, 3], Learning rate: 0.00390625, Epoch: 9
Error: 10.923060296124847 | Layers: [4, 512, 3], Learning rate: 0.00390625, Epoch: 18
Error: 10.926833438523241 | Layers: [4, 2, 3], Learning rate: 0.001953125, Epoch: 18
Error: 10.933937250560271 | Layers: [4

In [13]:
# Test the model with keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

In [14]:
model = Sequential()
model.add(Dense(layer_sizes[1], input_shape=(layer_sizes[0],), activation='sigmoid'))
model.add(Dense(layer_sizes[2], activation='linear'))

In [15]:
sdg = SGD(.9)
model.compile(loss="categorical_crossentropy", optimizer=sdg,
	metrics=["accuracy"])
model.fit(inputs, outputs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - accuracy: 0.5000 - loss: 7.0425


<keras.src.callbacks.history.History at 0x1672a4dd0>

In [16]:
model.predict(inputs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


array([[ 0.01186371, -8.282264  , -1.4656483 ],
       [ 0.0887537 , -9.469966  , -1.5549538 ],
       [ 0.15937938, -8.323184  , -1.3819622 ],
       [ 0.22767666, -9.4916    , -1.4718292 ],
       [-0.01628557, -8.611647  , -1.6199257 ],
       [ 0.04203045, -9.72632   , -1.6907108 ],
       [ 0.14307892, -8.662352  , -1.5352875 ],
       [ 0.20412186, -9.765932  , -1.603532  ]], dtype=float32)