In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## Simple perceptron w/sklearn API + Iris classification

In [2]:
iris = load_iris()
X = iris.data[:, (2,3)] # only petal length and width
y = (iris.target == 0).astype(int)  # make binary "is it a setosa?"

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

In [4]:
print(perceptron.score(X_test, y_test)) # given petal length of 2 and width of 0.5, is it a setosa?

1.0


## Simple perceptron from scratch w/Iris classification

A single neuron/node with:
- 2 inputs (petal length and width) and 2 input weights
- 1 bias input (always inputs 1) and 1 bias weight
- 1 binary output (1 == "is setosa", 0 == "is not setosa")

Learning process:
- For each input instance in training set:
    - multiply each input feature (petal length and width) by its weight, and multiply the bias by its weight
    - Sum the results
    - If greater than or equal to 0, output 1, else output 0
    - Calculate error by minusing the output from the target output. 
    - For each weight:
        - Add the ((error * learning rate) * input) to the weight
        - e.g. if the weight was 0.5, the input was -0.7, the output was 1 and the target output was 0, that's an error of -1, 0.05 * -1 * -0.7 = 0.035, so new weight is 0.535. If you got the same input again you've amplified the negative input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
        - e.g. if the weight was 0.5, the input was 0.7, the output 1 and the target output was 0, that's error of -1, 0.05 * -1 * 0.7 = -0.035, so the new weight is 0.465. If you got the same input again, you've supressed the positive input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
       


In [5]:
class MyPerceptron:
    def __init__(self):   
        self.input_weights = [0.5, 0.5]
        self.bias_weight = 0.5
        self.learning_rate = 0.05
    
    def fit(self, X, y):
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error = y[i] - output
            for j in range(0, len(self.input_weights)):
                self.input_weights[j] = self.input_weights[j] + ((self.learning_rate * error) * X[i, j])
            self.bias_weight = self.bias_weight + ((self.learning_rate * error) * 1)
            
    def predict(self, x):
        product = 0
        for input_val, weight in zip(x, self.input_weights):
            product += input_val * weight
        product += 1 * self.bias_weight
        return int(product >= 0) # simple heaviside step function
    
    def score(self, X, y):
        error = y.shape[0]
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error -= abs(y[i] - output)
        return f"{error}/{y.shape[0]}"

In [6]:
model = MyPerceptron()
print(model.input_weights)
print(model.score(X_test,y_test))
model.fit(X_train, y_train)
print(model.input_weights)
print(model.score(X_test,y_test))

[0.5, 0.5]
14/38
[-0.21500000000000002, 0.27]
36/38


## Simple perceptron continous output using backpropagation

In [7]:
import math

def sigmoid(z):
    return 1 / (1 + math.exp(-z))

def half_mse(a, y):
    return 0.5 * ((a - y)**2)   # half MSE is nicer derivative than MSE


class BackPropagatingPerceptron:
    def __init__(self):
        self.a_out = []   # output after sigmoid - is a matrix because storing output from multiple input instances
        #self.z_out = []   # doesn't need storing for backprop
        self.w_out = [0.5, 0.5]
        self.b_out = 0.5
        
        self.l_rate = 0.5
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(0, X.shape[0]):
            z_out = 0
            for j in range(0, len(self.w_out)):
                z_out += self.w_out[j] * X[i, j]
            z_out += self.b_out
            squished = sigmoid(z_out)
            self.a_out.append(squished) # matrix cus for hidden layers it'd be multiple outputs per input instance
            cost = half_mse(squished, y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        w_delta_out = [[],[]] # 1 gradient per weight per input instance
        b_delta_out = [] # 1 gradient per input instance
        for i in range(0, X.shape[0]): # i = instance
            for j in range(0, len(self.w_out)): # j = weight
                print(-(y[i] - self.a_out[i]))
                w_delta_out[j].append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * X[i, j])  
            b_delta_out.append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * 1)
        
        for i in range(0, len(w_delta_out)):
            self.w_out[i] = self.w_out[i] - (self.l_rate * (sum(w_delta_out[i]) / len(w_delta_out[i])))
        self.b_out = self.b_out - (self.l_rate * (sum(b_delta_out) / len(b_delta_out)))
        
        # why are my gradient exploding after a while? read le book

In [8]:
diabetes = load_diabetes()
X = diabetes.data[:, (0,3)]
y = diabetes.target

scaler = MinMaxScaler()  # because output is sigmoid
y = scaler.fit_transform(y.reshape(-1, 1))
y = y.reshape(-1)

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
model = BackPropagatingPerceptron()
for epoch in range(0, 1):
    if epoch % 10 == 0:
        print(model.forward_pass(X_train, y_train))
    model.forward_pass(X_train, y_train)
    model.backwards_pass(X_train, y_train)

15.767620345732574
0.4389289184383177
0.4389289184383177
0.31473651020368476
0.31473651020368476
-0.05900281355170456
-0.05900281355170456
-0.23198053141744535
-0.23198053141744535
0.39404613787438714
0.39404613787438714
-0.3516356935067194
-0.3516356935067194
0.595888832679675
0.595888832679675
-0.04795932628223265
-0.04795932628223265
-0.09915727006984709
-0.09915727006984709
0.40812430868190974
0.40812430868190974
0.09002763743350373
0.09002763743350373
0.21678814889669518
0.21678814889669518
0.11075988596690822
0.11075988596690822
0.15424587031388737
0.15424587031388737
0.025209863186482995
0.025209863186482995
0.4752454480784013
0.4752454480784013
0.21569176441407872
0.21569176441407872
0.025427772686548833
0.025427772686548833
0.11600190931851928
0.11600190931851928
0.552213328626229
0.552213328626229
0.39908609236227865
0.39908609236227865
0.18827392670268084
0.18827392670268084
0.10958918430553699
0.10958918430553699
0.3689402715505416
0.3689402715505416
0.6213345562373987
0.62

In [101]:
import random

class MultiLayerPerceptron:
    def __init__(self):
        self.layer_out = Layer(n_nodes = 1)
    
    def forward_pass(self, X, y):
        output = self.layer_out.forward_pass(X)
        cost_out = []
        for i in range(X.shape[0]):
            cost = half_mse(output[0][i], y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        cost_delta = []
        for i in range(X.shape[0]):
            cost_delta.append(-(y[i] - self.layer_out.nodes[0].outputs[i]))
        z_delta_next = [[cost_delta]] # [prev_node_1[curr_node1_deltas[instances], curr_node2_delta[instance]], prev_node_2...]
            
        self.layer_out.backwards_pass(X, z_delta_next)
            

class Layer:
    def __init__(self, n_nodes):
        self.nodes = []
        for i in range(n_nodes):
            self.nodes.append(Node(2))
    
    def forward_pass(self, X):
        outputs = []
        for i in range(len(self.nodes)):
            outputs.append(self.nodes[i].forward_pass(X))
        return outputs
    
    def backwards_pass(self, X, z_delta_next):
        for i in range(len(self.nodes)):
            self.nodes[i].backwards_pass(X, z_delta_next[i])
        
class Node:
    def __init__(self, n_inputs):
        self.outputs = []
        self.weights = []
        self.bias = 0.5 #random.random()
        self.l_rate = 0.5
         
        for i in range(n_inputs):
            self.weights.append(0.5)
    
    def forward_pass(self, X):
        self.outputs = []
        out = []
        for i in range(X.shape[0]):
            z_sum = 0
            for j in range(len(self.weights)):
                z_sum += self.weights[j] * X[i, j]
            z_sum += self.bias
            self.outputs.append(sigmoid(z_sum))
        return self.outputs
    
    def backwards_pass(self, X, z_delta_next):
        w_delta_matrix = [[] for i in range(len(self.weights))]
        b_delta_array = []
        for i in range(0, X.shape[0]):
            z_delta = self.outputs[i]*(1-self.outputs[i])
            for j in range(len(self.weights)):
                part_w_delta = z_delta * X[i, j]
                full_w_delta = 0
                for k in range(len(z_delta_next)):
                    full_w_delta += z_delta_next[k][i] * part_w_delta
                w_delta_matrix[j].append(full_w_delta)
            b_delta_array.append(self.outputs[i]*(1-self.outputs[i]))
        
        for i in range(len(w_delta_matrix)):
            self.weights[i] = self.weights[i] - (self.l_rate * (sum(w_delta_matrix[i]) / len(w_delta_matrix[i])))
        self.bias = self.bias - (self.l_rate * (sum(b_delta_array) / len(b_delta_array)))
        
        
# return z_delta * the weights

# once I sort the input into backwards_pass() and output of backward_pass()... it should work with any number of
# layers and nodes?

# the thing that's making this confusing is having to keep track of the instances in the main backprop code...
# could that loop be moved elsewhere somehow?

# I need to add to a z_delta_next for each node in prevous layer during backprop
# For each node in previous layer, each node should add its z_delta multiplied by the weight for the prev layer node

In [103]:
model = MultiLayerPerceptron()

for epoch in range(0, 300):
    print(model.forward_pass(X_train, y_train))
    model.backwards_pass(X_train, y_train)
    print(model.layer_out.nodes[0].weights)

15.767620345732574
[0.4935362573447928, 0.500291033619962]
14.36593479983957
[0.486373223513595, 0.49973699664759574]
13.060944043929076
[0.4786688852921059, 0.4984386952645876]
11.873124706457787
[0.4706009599599929, 0.4965271309159595]
10.822733726401399
[0.46236167783518695, 0.494160817688519]
9.92852274863209
[0.45415082881161406, 0.49152034791928123]
9.206232230907798
[0.44616733103269224, 0.48880020870215235]
8.667049257624866
[0.4385999380280136, 0.4861983902981928]
8.316284084179221
[0.4316180355335624, 0.4839048869504642]
8.152537232400759
[0.425363663530186, 0.4820905861333835]
8.16756603640367
[0.41994582546726467, 0.48089810256031723]
8.346921587301667
[0.41543779175176576, 0.4804357728869881]
8.671255263470098
[0.4118775594675138, 0.48077537806912807]
9.118049880269641
[0.4092710676243011, 0.4819534189961815]
9.663464142833107
[0.4075973593371657, 0.4839751732978589]
10.284004287882345
[0.40681472315090333, 0.48682045509770455]
10.957828771966268
[0.4068669243324884, 0.490

31.005777648515668
[1.598109033910608, 2.29862750285932]
31.010929123209948
[1.6021185638092625, 2.3046620980451658]
31.01606128077564
[1.6061204549747021, 2.3106848311485253]
31.021174380286062
[1.6101147417002628, 2.316695747364887]
31.026268673518565
[1.6141014581946256, 2.3226948918507806]
31.03134440516549
[1.6180806385799988, 2.328682309720392]
31.036401813038992
[1.6220523168903411, 2.334658046042237]
31.041441128270332
[1.6260165270696245, 2.340622145835889]
31.04646257550345
[1.6299733029701398, 2.3465746540687697]
31.05146637308348
[1.633922678350842, 2.3525156156529943]
31.056452733239627
[1.6378646868757387, 2.358445075442276]
31.06142186226293
[1.6417993621123173, 2.3643630782288922]
31.066373960679524
[1.6457267375300155, 2.3702696687407054]
31.071309223418552
[1.649646846498732, 2.3761648916382465]
31.076227839975523
[1.6535597222873777, 2.3820487915118562]
31.08112999457131
[1.6574653980624687, 2.387921412878885]
31.08601586630639
[1.661363906886759, 2.393782800180952]


In [140]:
class MLP:
    def __init__(self):
        self.layer_h1 = Layer(n_nodes = 3, n_inputs = 2)
        self.layer_o = Layer(n_nodes = 1, n_inputs = 1)
        
        self.costs = []
    
    def forward_pass(self, x, y):
        output = self.layer_h1.forward_pass(x)
        output = self.layer_o.forward_pass(output)
        cost = half_mse(output, y)
        self.costs.append(cost)
    
    def get_cost(self):
        total_cost = sum(self.costs)
        self.costs = []
        return total_cost
    
    def backwards_pass(self, x, y):
        out_output_grads = [[-(y - self.layer_o.prev_outputs[0])]]
        h1_output_grads = self.layer_o.backwards_pass(self.layer_h1.prev_outputs, out_output_grads)
        _ = self.layer_h1.backwards_pass(x, h1_output_grads)
        

class Layer:
    def __init__(self, n_nodes, n_inputs):
        self.nodes = []
        self.prev_outputs = []
        for i in range(n_nodes):
            self.nodes.append(Node(n_inputs))
    
    def forward_pass(self, inputs):
        self.prev_outputs = []
        for node in self.nodes:
            self.prev_outputs.append(node.forward_pass(inputs))
        return self.prev_outputs
    
    def backwards_pass(self, inputs, output_grad_matrix):
        input_grad_matrix = [[] for i in range(len(inputs))]
        for i in range(len(self.nodes)):
            input_grads = self.nodes[i].backwards_pass(inputs, output_grad_matrix[i])
            for j in range(len(input_grads)):
                input_grad_matrix[j].append(input_grads[j]) # this could be a transpose operation
        return input_grad_matrix
              
    
class Node:
    def __init__(self, n_inputs):
        self.prev_output = None
        self.l_rate = 0.0005
        self.bias = 0.5
        self.weights = []
        for _ in range(n_inputs):
            self.weights.append(0.5)
    
    def forward_pass(self, inputs):
            z_sum = 0
            for i in range(len(self.weights)):
                z_sum += self.weights[i] * inputs[i]
            z_sum += self.bias
            output = sigmoid(z_sum)
            self.prev_output = output
            return output
        
    def backwards_pass(self, inputs, output_grads):
        input_grads = []
        z_grad = self.prev_output*(1-self.prev_output)
        for i in range(len(self.weights)):
            part_input_grad = z_grad * self.weights[i]
            full_input_grad = 0
            part_weight_grad = z_grad * inputs[i]
            full_weight_grad = 0
            for j in range(len(output_grads)):
                full_input_grad += output_grads[j] * part_input_grad
                full_weight_grad += output_grads[j] * part_weight_grad
            input_grads.append(full_input_grad)   
            self.weights[i] = self.weights[i] - (self.l_rate * full_weight_grad)
        bias_grad = self.prev_output*(1-self.prev_output) #?
        self.bias = self.bias - (self.l_rate * bias_grad)
        return input_grads     

In [141]:
model = MLP()
for i in range(30):
    for j in range(331):
        model.forward_pass(X_train[j, :], y_train[j])
        model.backwards_pass(X_train[j, :], y_train[j])
    print(model.get_cost())

[22.30618334]
[21.44562776]
[20.59394883]
[19.75388269]
[18.92823876]
[18.11987278]
[17.33165687]
[16.56644694]
[15.82704802]
[15.11617853]
[14.43643388]
[13.79025085]
[13.17987324]
[12.6073201]
[12.07435734]
[11.58247357]
[11.13286087]
[10.72640114]
[10.36365824]
[10.04487618]
[9.76998325]
[9.5386018]
[9.35006328]
[9.20342783]
[9.09750777]
[9.0308941]
[9.0019852]
[9.00901684]
[9.05009269]
[9.12321458]


In [104]:
X_train.shape

(331, 2)