ToDos:

DONE: Add bias unit to backpropogation/updates (as w[0], or as a distinct variable?)
- Allow for variable cost functions
- Relabel delta, error variables to be more accurate to mathematical models
- Add check for determining when training is complete?
- Add optional internal regularization of inputs? (Or is this better kept external?)
- Why can't it work with a single training example? Where is error coming from? I still don't get it :(

In [13]:
import numpy as np

### NETWORK CLASSES

class nNetwork(object):
    def __init__(self, numX, numY, hlayers, activation, cost=(lambda Y, o : Y - o)):
        # Define activation function. 
        # Expects 'activation' to be a class with forward() and backward() static methods
        self.activation = activation
        
        # Expects a function that can take labels, outputs, and return error
        self.cost = cost
        
        # Initialize network layers as a list of nlayer objects
        # Expects 'hlayers' to be a list of integers; each being the size of each layer
        self.layer = [ nLayer(i, activation) for i in hlayers ]
        self.layer.insert(0, nLayer(numX, activation))
        self.layer.append(nLayer(numY, activation))
        
        # Initialize WEIGHT MATRICIES and BIASES for each layer
        # WEIGHT MATRICIES are the theta multipliers applied to the
        # PREVIOUS layer's output to determine the CURRENT layer's activations
        # Each node is represented by a row; each input is a column
        for i in range(1, len(self.layer)):
            prevnodes = self.layer[i-1].len
            curnodes = self.layer[i].len
            # Add an additional set of weights to serve as biases for each node in this layer
            self.layer[i].w = np.random.randn(prevnodes + 1, curnodes)
    
    def forward(self, X):
        # Input layer activation
        a = self.layer[0].input_activation(X)
        
        # Hidden and output layer activation
        for l in self.layer[1:]:
            a = l.activation(a)
        
        # Clean bias unit from output layer
        self.layer[-1].a = np.delete(self.layer[-1].a, 0, axis=1) 
        return a
    
    def backward(self, Y, rate):
        prime = self.activation.backward
        cost = self.cost
        
        # Calculate delta for all layers
        for l in self.layer[::-1]:
            if l == self.layer[-1]:
                # For output layer:
                # Calculate error (labels - predictions)
                # Calculate delta
                l.error = cost(Y, l.a)
                l.delta = l.error*prime(l.a)
                pre_d = l.delta
                pre_w = l.w
                
            elif l == self.layer[0]:
                # For input layer:
                # Do nothing
                pass
            
            else:
                # For hidden layers:
                # Calculate error (apply succeeding layer's delta to adjoined weights)
                # Calculate delta
                l.error = pre_d @ pre_w.T[:, 1:]    # Don't backpropogate from the Bias node!
                l.delta = l.error*prime(l.a[:, 1:]) # Don't backpropogate from the Bias node!
                pre_d = l.delta
                pre_w = l.w
        
        # Apply delta for all layers
        for l in self.layer:
            if l == self.layer[0]:
                # For input layer:
                # Do nothing
                pre_a = l.a
            else:
                # For hidden & output layers:
                # Set weights to weights + deltas * inputs * learning rate
                l.w += (pre_a.T @ l.delta) * rate
                pre_a = l.a          
            
            
    def train (self, X, Y, epochs, rate=1):
        print(f'Training for {epochs} epochs at α={rate}:')
        for i in range(0,epochs):
            a = self.forward(X)
            self.backward(Y, rate)
            print(f'Epoch {i+1} Loss: {np.mean(np.square(Y - a))}')
    
    def describe(self):
        # Descriptive tool for displaying network's size, shape, etc.
        out = ''
        
        print(f'Input Nodes (X): {self.layer[0].len}')
        print(f'Output Nodes (Y): {self.layer[-1].len}')
        print(f'Hidden Layers: {len(self.layer[1:-1])} Total')
        print(f'   Sizes: {[l.len for l in self.layer[1:-1]]}')
        print('\nNetwork Shape:')
        for index, item in enumerate(self.layer):
            if index == 0:
                out += str(item.len) + 'X '
            elif index == len(self.layer)-1:
                out += '| ' + str(item.len) + 'Y'
            else:
                out += '| ' + str(item.len) + ' '
        print(out)
        

class nLayer(object):
    def __init__(self, nodecount, activation):
        self.len = nodecount         # Number of nodes/perceptrons. Used for reports. 
        self.w = None                # Weight matrix (declared in nNetwork object)
        self.z = None                # (Dot product of inputs x weight matrix) + bias'
        self.a = None                # 1D array of unit outputs
        self.error = None            # Layer error
        self.delta = None            # 1D array of deltas, for backprop
        self.g = activation.forward  # Layer activation function
        
    def activation(self, i):
        self.z = i @ self.w
        self.a = self.g(self.z)
        self.a = np.hstack((np.ones((np.size(self.a,0),1)), self.a))
        return self.a
    
    def input_activation(self, i):
        self.z = i
        self.a = self.z
        self.a = np.hstack((np.ones((np.size(self.a,0),1)), self.a))
        return self.a
    
    def describe(self):
        print(f'Layer Nodes: {self.len}')
        print(f'\nw (input weights): {len(self.w)} (inputs) x {len(self.w.T)} (nodes)\n{self.w}')
        print(f'\nz (weighted sums): {len(self.z)}\n{self.z}')
        print(f'\na (outputs): {len(self.a)}\n{self.a}')
        print(f'\nerrors: {len(self.error)}\n{self.error}')
        print(f'\ndeltas: {len(self.delta)}\n{self.delta}')
        print(f'\nactivation function: {self.g}')

        
        
### ACTIVATION FUNCTIONS (CLASSES)       

class sigmoid(object):
    @staticmethod
    def forward(x):
        return 1/(1+np.exp(-x))
    
    
    # TECHINICALLY, sigmoid' is sigmoid(x) * (1 - sigmoid(x))
    # However, we are applying this to the nNetwork.layer.a values, 
    # which have already had the sigmoid function applied to them.
    # Therefore, we will use x * (1 - x) for backpropogation.
    @staticmethod
    def backward(x):
        return x * (1 - x)

    
    
    
### COST FUNCTIONS

def crossentropy(Y, o):
    # Cost funtion for use in classification problems
    if Y == 1:
        return -(np.log(o))
    else:
        return -(np.log(1 - o))
    
    
    
### SUPPORT FUNCTIONS

def scale(X, Y, Xmax=None, Ymax=None):
    if not Xmax:
        Xmax = np.amax(X, axis=0)
    if not Ymax:
        Ymax = np.amax(Y, axis=0)
    
    X_b = X/Xmax
    Y_b = Y/Ymax
    
    return X_b, Y_b

In [14]:
X1 = np.array(([2, 9], [1, 5]), dtype=float)
y1 = np.array(([92], [86]), dtype=float)

# scale units
X1, y1 = scale(X1, y1, Ymax=100)

test1 = nNetwork(2,1,[4,6,4],sigmoid)
test1.train(X1, y1, 1000)

Training for 1000 epochs at α=1:
Epoch 1 Loss: 0.20075869227062476
Epoch 2 Loss: 0.1410189076591983
Epoch 3 Loss: 0.08489711804201441
Epoch 4 Loss: 0.047677578493861034
Epoch 5 Loss: 0.028898445123512917
Epoch 6 Loss: 0.01995162910869915
Epoch 7 Loss: 0.015357925867988464
Epoch 8 Loss: 0.01276446251977115
Epoch 9 Loss: 0.011178417581281238
Epoch 10 Loss: 0.01014603984427185
Epoch 11 Loss: 0.009440710213855261
Epoch 12 Loss: 0.00894010645884216
Epoch 13 Loss: 0.008573798269424455
Epoch 14 Loss: 0.008299013723254325
Epoch 15 Loss: 0.008088602946803136
Epoch 16 Loss: 0.007924681513685724
Epoch 17 Loss: 0.0077950927769651
Epoch 18 Loss: 0.007691348610309258
Epoch 19 Loss: 0.007607383398331416
Epoch 20 Loss: 0.007538774314962442
Epoch 21 Loss: 0.007482238818556664
Epoch 22 Loss: 0.007435302294772414
Epoch 23 Loss: 0.007396073119859858
Epoch 24 Loss: 0.007363087268079259
Epoch 25 Loss: 0.007335198966827321
Epoch 26 Loss: 0.00731150246566696
Epoch 27 Loss: 0.007291275217502467
Epoch 28 Loss: 

Epoch 429 Loss: 0.007046838725496881
Epoch 430 Loss: 0.007046649555013772
Epoch 431 Loss: 0.0070464607393140475
Epoch 432 Loss: 0.007046272277623594
Epoch 433 Loss: 0.007046084169169589
Epoch 434 Loss: 0.007045896413180502
Epoch 435 Loss: 0.007045709008886106
Epoch 436 Loss: 0.007045521955517447
Epoch 437 Loss: 0.007045335252306887
Epoch 438 Loss: 0.007045148898488077
Epoch 439 Loss: 0.00704496289329597
Epoch 440 Loss: 0.007044777235966807
Epoch 441 Loss: 0.007044591925738142
Epoch 442 Loss: 0.0070444069618488155
Epoch 443 Loss: 0.007044222343538989
Epoch 444 Loss: 0.0070440380700501045
Epoch 445 Loss: 0.007043854140624919
Epoch 446 Loss: 0.007043670554507494
Epoch 447 Loss: 0.0070434873109431854
Epoch 448 Loss: 0.007043304409178668
Epoch 449 Loss: 0.007043121848461911
Epoch 450 Loss: 0.0070429396280421944
Epoch 451 Loss: 0.007042757747170112
Epoch 452 Loss: 0.007042576205097547
Epoch 453 Loss: 0.0070423950010777125
Epoch 454 Loss: 0.007042214134365117
Epoch 455 Loss: 0.007042033604215

Epoch 877 Loss: 0.006988184397360406
Epoch 878 Loss: 0.006988095083517903
Epoch 879 Loss: 0.006988005896466715
Epoch 880 Loss: 0.006987916835912108
Epoch 881 Loss: 0.006987827901560024
Epoch 882 Loss: 0.0069877390931171015
Epoch 883 Loss: 0.006987650410290655
Epoch 884 Loss: 0.006987561852788676
Epoch 885 Loss: 0.006987473420319854
Epoch 886 Loss: 0.006987385112593536
Epoch 887 Loss: 0.006987296929319756
Epoch 888 Loss: 0.006987208870209229
Epoch 889 Loss: 0.0069871209349733265
Epoch 890 Loss: 0.006987033123324114
Epoch 891 Loss: 0.006986945434974307
Epoch 892 Loss: 0.006986857869637305
Epoch 893 Loss: 0.006986770427027165
Epoch 894 Loss: 0.0069866831068586144
Epoch 895 Loss: 0.006986595908847044
Epoch 896 Loss: 0.006986508832708502
Epoch 897 Loss: 0.006986421878159709
Epoch 898 Loss: 0.006986335044918028
Epoch 899 Loss: 0.006986248332701495
Epoch 900 Loss: 0.0069861617412287965
Epoch 901 Loss: 0.006986075270219268
Epoch 902 Loss: 0.006985988919392911
Epoch 903 Loss: 0.0069859026884703

In [11]:
bee = None
if bee:
    print('hi')