In [1]:
import numpy as np
import random
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## Simple perceptron w/sklearn API + Iris classification

In [2]:
iris = load_iris()
X = iris.data[:, (2,3)] # only petal length and width
y = (iris.target == 0).astype(int)  # make binary "is it a setosa?"

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

In [4]:
print(perceptron.score(X_test, y_test)) # given petal length of 2 and width of 0.5, is it a setosa?

1.0


## Simple perceptron from scratch w/Iris classification

A single neuron/node with:
- 2 inputs (petal length and width) and 2 input weights
- 1 bias input (always inputs 1) and 1 bias weight
- 1 binary output (1 == "is setosa", 0 == "is not setosa")

Learning process:
- For each input instance in training set:
    - multiply each input feature (petal length and width) by its weight, and multiply the bias by its weight
    - Sum the results
    - If greater than or equal to 0, output 1, else output 0
    - Calculate error by minusing the output from the target output. 
    - For each weight:
        - Add the ((error * learning rate) * input) to the weight
        - e.g. if the weight was 0.5, the input was -0.7, the output was 1 and the target output was 0, that's an error of -1, 0.05 * -1 * -0.7 = 0.035, so new weight is 0.535. If you got the same input again you've amplified the negative input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
        - e.g. if the weight was 0.5, the input was 0.7, the output 1 and the target output was 0, that's error of -1, 0.05 * -1 * 0.7 = -0.035, so the new weight is 0.465. If you got the same input again, you've supressed the positive input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
       


In [5]:
class MyPerceptron:
    def __init__(self):   
        self.input_weights = [0.5, 0.5]
        self.bias_weight = 0.5
        self.learning_rate = 0.05
    
    def fit(self, X, y):
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error = y[i] - output
            for j in range(0, len(self.input_weights)):
                self.input_weights[j] = self.input_weights[j] + ((self.learning_rate * error) * X[i, j])
            self.bias_weight = self.bias_weight + ((self.learning_rate * error) * 1)
            
    def predict(self, x):
        product = 0
        for input_val, weight in zip(x, self.input_weights):
            product += input_val * weight
        product += 1 * self.bias_weight
        return int(product >= 0) # simple heaviside step function
    
    def score(self, X, y):
        error = y.shape[0]
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error -= abs(y[i] - output)
        return f"{error}/{y.shape[0]}"

In [6]:
model = MyPerceptron()
print(model.input_weights)
print(model.score(X_test,y_test))
model.fit(X_train, y_train)
print(model.input_weights)
print(model.score(X_test,y_test))

[0.5, 0.5]
11/38
[-0.20999999999999996, 0.19499999999999998]
37/38


## Simple perceptron continuous output using backpropagation

In [7]:
import math

def sigmoid(z):
    return 1 / (1 + math.exp(-z))

def half_mse(a, y):
    return 0.5 * ((a - y)**2)   # half MSE is nicer derivative than MSE


class BackPropagatingPerceptron:
    def __init__(self):
        self.a_out = []   # output after sigmoid - is a matrix because storing output from multiple input instances
        #self.z_out = []   # doesn't need storing for backprop
        self.w_out = [0.5, 0.5]
        self.b_out = 0.5
        
        self.l_rate = 0.5
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(0, X.shape[0]):
            z_out = 0
            for j in range(0, len(self.w_out)):
                z_out += self.w_out[j] * X[i, j]
            z_out += self.b_out
            squished = sigmoid(z_out)
            self.a_out.append(squished) # matrix cus for hidden layers it'd be multiple outputs per input instance
            cost = half_mse(squished, y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        w_delta_out = [[],[]] # 1 gradient per weight per input instance
        b_delta_out = [] # 1 gradient per input instance
        for i in range(0, X.shape[0]): # i = instance
            for j in range(0, len(self.w_out)): # j = weight
                print(-(y[i] - self.a_out[i]))
                w_delta_out[j].append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * X[i, j])  
            b_delta_out.append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * 1)
        
        for i in range(0, len(w_delta_out)):
            self.w_out[i] = self.w_out[i] - (self.l_rate * (sum(w_delta_out[i]) / len(w_delta_out[i])))
        self.b_out = self.b_out - (self.l_rate * (sum(b_delta_out) / len(b_delta_out)))
        
        # why are my gradient exploding after a while? read le book

In [8]:
diabetes = load_diabetes()
X = diabetes.data[:, (0,3)]
y = diabetes.target
X = X.astype("float32")
y = y.astype("float32")

scaler = MinMaxScaler()  # because output is sigmoid
y = scaler.fit_transform(y.reshape(-1, 1))
y = y.reshape(-1)

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


np.savetxt("X_train.csv", X_train, delimiter = ',', fmt = "%f")
np.savetxt("X_test.csv", X_test, delimiter = ',', fmt = "%f")
np.savetxt("y_train.csv", y_train, delimiter = ',', fmt = "%f")
np.savetxt("y_test.csv", y_test, delimiter = ',', fmt = "%f")

In [9]:
# model = BackPropagatingPerceptron()
# for epoch in range(0, 1):
#     if epoch % 10 == 0:
#         print(model.forward_pass(X_train, y_train))
#     model.forward_pass(X_train, y_train)
#     model.backwards_pass(X_train, y_train)

## Backpropagating Single Node / Perceptron  w/ batch gradient descent

In [10]:

# class MultiLayerPerceptron:
#     def __init__(self):
#         self.layer_out = Layer(n_nodes = 1)
    
#     def forward_pass(self, X, y):
#         output = self.layer_out.forward_pass(X)
#         cost_out = []
#         for i in range(X.shape[0]):
#             cost = half_mse(output[0][i], y[i])
#             cost_out.append(cost)
#         return sum(cost_out)
    
#     def backwards_pass(self, X, y):
#         cost_delta = []
#         for i in range(X.shape[0]):
#             cost_delta.append(-(y[i] - self.layer_out.nodes[0].outputs[i]))
#         z_delta_next = [[cost_delta]] # [prev_node_1[curr_node1_deltas[instances], curr_node2_delta[instance]], prev_node_2...]
            
#         self.layer_out.backwards_pass(X, z_delta_next)
            

# class Layer:
#     def __init__(self, n_nodes):
#         self.nodes = []
#         for i in range(n_nodes):
#             self.nodes.append(Node(2))
    
#     def forward_pass(self, X):
#         outputs = []
#         for i in range(len(self.nodes)):
#             outputs.append(self.nodes[i].forward_pass(X))
#         return outputs
    
#     def backwards_pass(self, X, z_delta_next):
#         for i in range(len(self.nodes)):
#             self.nodes[i].backwards_pass(X, z_delta_next[i])
        
# class Node:
#     def __init__(self, n_inputs):
#         self.outputs = []
#         self.weights = []
#         self.bias = 0.5 #random.random()
#         self.l_rate = 0.5
         
#         for i in range(n_inputs):
#             self.weights.append(0.5)
    
#     def forward_pass(self, X):
#         self.outputs = []
#         out = []
#         for i in range(X.shape[0]):
#             z_sum = 0
#             for j in range(len(self.weights)):
#                 z_sum += self.weights[j] * X[i, j]
#             z_sum += self.bias
#             self.outputs.append(sigmoid(z_sum))
#         return self.outputs
    
#     def backwards_pass(self, X, z_delta_next):
#         w_delta_matrix = [[] for i in range(len(self.weights))]
#         b_delta_array = []
#         for i in range(0, X.shape[0]):
#             z_delta = self.outputs[i]*(1-self.outputs[i])
#             for j in range(len(self.weights)):
#                 part_w_delta = z_delta * X[i, j]
#                 full_w_delta = 0
#                 for k in range(len(z_delta_next)):
#                     full_w_delta += z_delta_next[k][i] * part_w_delta
#                 w_delta_matrix[j].append(full_w_delta)
#             b_delta_array.append(self.outputs[i]*(1-self.outputs[i]))
        
#         for i in range(len(w_delta_matrix)):
#             self.weights[i] = self.weights[i] - (self.l_rate * (sum(w_delta_matrix[i]) / len(w_delta_matrix[i])))
#         self.bias = self.bias - (self.l_rate * (sum(b_delta_array) / len(b_delta_array)))
        
        
# # return z_delta * the weights

# # once I sort the input into backwards_pass() and output of backward_pass()... it should work with any number of
# # layers and nodes?

# # the thing that's making this confusing is having to keep track of the instances in the main backprop code...
# # could that loop be moved elsewhere somehow?

# # I need to add to a z_delta_next for each node in prevous layer during backprop
# # For each node in previous layer, each node should add its z_delta multiplied by the weight for the prev layer node

In [11]:
# model = MultiLayerPerceptron()

# for epoch in range(0, 30):
#     print(model.forward_pass(X_train, y_train))
#     model.backwards_pass(X_train, y_train)
#     print(model.layer_out.nodes[0].weights)

## Backpropagating 2-3-3-1 MultiLayerPerceptron without batch gradient descent

In [12]:
# class MLP:
#     def __init__(self):
#         self.layer_h1 = Layer(n_nodes = 3, n_inputs = 2)
#         self.layer_h2 = Layer(n_nodes = 3, n_inputs = 3)
#         self.layer_o = Layer(n_nodes = 1, n_inputs = 3)
        
#         self.costs = []
    
#     def forward_pass(self, x, y):
#         output = self.layer_h1.forward_pass(x)
#         output = self.layer_h2.forward_pass(output)
#         output = self.layer_o.forward_pass(output)
#         cost = half_mse(output[0], y)
#         self.costs.append(cost)
    
#     def get_cost(self):
#         total_cost = sum(self.costs)
#         self.costs = []
#         return total_cost
    
#     def backwards_pass(self, x, y):
#         out_output_grads = [[-(y - self.layer_o.prev_outputs[0])]]
#         h2_output_grads = self.layer_o.backwards_pass(self.layer_h2.prev_outputs, out_output_grads)
#         h1_output_grads = self.layer_h2.backwards_pass(self.layer_h1.prev_outputs, h2_output_grads)
#         _ = self.layer_h1.backwards_pass(x, h1_output_grads)
        
        

# class Layer:
#     def __init__(self, n_nodes, n_inputs):
#         self.nodes = []
#         self.prev_outputs = []
#         for i in range(n_nodes):
#             self.nodes.append(Node(n_inputs))
    
#     def forward_pass(self, inputs):
#         self.prev_outputs = []
#         for node in self.nodes:
#             self.prev_outputs.append(node.forward_pass(inputs))
#         return self.prev_outputs
    
#     def backwards_pass(self, inputs, output_grad_matrix):
#         input_grad_matrix = [[] for i in range(len(inputs))]
#         for i in range(len(self.nodes)):
#             input_grads = self.nodes[i].backwards_pass(inputs, output_grad_matrix[i])
#             for j in range(len(input_grads)):
#                 input_grad_matrix[j].append(input_grads[j]) # this could be a transpose operation
#         return input_grad_matrix
              
    
# class Node:
#     def __init__(self, n_inputs):
#         self.prev_output = None
#         self.l_rate = 0.001
#         self.bias = random.random()
#         self.weights = []
#         for _ in range(n_inputs):
#             self.weights.append(random.random())
    
#     def forward_pass(self, inputs):
#             z_sum = 0
#             for i in range(len(self.weights)):
#                 z_sum += self.weights[i] * inputs[i]
#             z_sum += self.bias
#             output = sigmoid(z_sum)
#             self.prev_output = output
#             return output
        
#     def backwards_pass(self, inputs, output_grads):
#         input_grads = []
#         z_grad = self.prev_output*(1-self.prev_output)
#         for i in range(len(self.weights)):
#             part_input_grad = z_grad * self.weights[i]
#             full_input_grad = 0
#             part_weight_grad = z_grad * inputs[i]
#             full_weight_grad = 0
#             for j in range(len(output_grads)):
#                 full_input_grad += output_grads[j] * part_input_grad
#                 full_weight_grad += output_grads[j] * part_weight_grad
#             input_grads.append(full_input_grad)   
#             self.weights[i] = self.weights[i] - (self.l_rate * full_weight_grad)
#         bias_grad = self.prev_output*(1-self.prev_output)
#         self.bias = self.bias - (self.l_rate * bias_grad)
#         return input_grads     

In [13]:
# model = MLP()
# for epoch in range(30):
#     for j in range(331):
#         model.forward_pass(X_train[j, :], y_train[j])
#         model.backwards_pass(X_train[j, :], y_train[j])
#     print(model.get_cost())

## Backpropagating 2-3-3-1 MultiLayerPerceptron with batch gradient descent

In [31]:
class MLP:
    def __init__(self):
        self.layer_h1 = Layer(n_nodes = 3, n_inputs = 2)
        self.layer_h2 = Layer(n_nodes = 3, n_inputs = 3)
        self.layer_o = Layer(n_nodes = 1, n_inputs = 3)
        
        self.costs = []
    
    def forward_pass(self, x, y):
        output = self.layer_h1.forward_pass(x)
        output = self.layer_h2.forward_pass(output)
        output = self.layer_o.forward_pass(output)
        cost = half_mse(output[0], y)
        self.costs.append(cost)
    
    def get_cost(self):
        total_cost = sum(self.costs)
        self.costs = []
        return total_cost
    
    def backwards_pass(self, x, y):
        out_output_grads = [[-(y - self.layer_o.prev_outputs[0])]]
        h2_output_grads = self.layer_o.backwards_pass(self.layer_h2.prev_outputs, out_output_grads)
        h1_output_grads = self.layer_h2.backwards_pass(self.layer_h1.prev_outputs, h2_output_grads)
        _ = self.layer_h1.backwards_pass(x, h1_output_grads)
        
        

class Layer:
    def __init__(self, n_nodes, n_inputs):
        self.nodes = []
        self.prev_outputs = []
        for i in range(n_nodes):
            self.nodes.append(Node(n_inputs))
    
    def forward_pass(self, inputs):
        self.prev_outputs = []
        for node in self.nodes:
            self.prev_outputs.append(node.forward_pass(inputs))
        return self.prev_outputs
    
    def backwards_pass(self, inputs, output_grad_matrix):
        input_grad_matrix = [[] for i in range(len(inputs))]
        for i in range(len(self.nodes)):
            input_grads = self.nodes[i].backwards_pass(inputs, output_grad_matrix[i])
            for j in range(len(input_grads)):
                input_grad_matrix[j].append(input_grads[j]) # this could be a transpose operation
        return input_grad_matrix
              
    
class Node:
    def __init__(self, n_inputs):
        self.prev_output = None
        self.l_rate = 0.01
        self.bias = random.random()
        self.weights = []
        for _ in range(n_inputs):
            self.weights.append(random.random())
        self.weight_grads = [[] for i in range(len(self.weights))]
        self.bias_grads = []
    
    def forward_pass(self, inputs):
            z_sum = 0
            for i in range(len(self.weights)):
                z_sum += self.weights[i] * inputs[i]
            z_sum += self.bias
            output = sigmoid(z_sum)
            self.prev_output = output
            return output
        
    def backwards_pass(self, inputs, output_grads):
        input_grads = []
        z_grad = self.prev_output*(1-self.prev_output)
        for i in range(len(self.weights)):
            part_input_grad = z_grad * self.weights[i]
            full_input_grad = 0
            part_weight_grad = z_grad * inputs[i]
            full_weight_grad = 0
            for j in range(len(output_grads)):
                full_input_grad += output_grads[j] * part_input_grad
                full_weight_grad += output_grads[j] * part_weight_grad
            input_grads.append(full_input_grad)   
            self.weight_grads[i].append(full_weight_grad)
        bias_grad = self.prev_output*(1-self.prev_output)
        self.bias_grads.append(bias_grad)
        if (len(self.bias_grads) == 20):
            self.take_step()
        return input_grads
    
    def take_step(self):
        for i in range(len(self.weights)):
            avg_weight_grad = sum(self.weight_grads[i]) / len(self.weight_grads[i])
            self.weights[i] = self.weights[i] - (self.l_rate * avg_weight_grad)
            self.weight_grads[i].clear()
        avg_bias_grad = sum(self.bias_grads) / len(self.bias_grads)
        self.bias = self.bias - (self.l_rate * avg_bias_grad)
        self.bias_grads.clear()
        

In [33]:
model = MLP()
for epoch in range(300):
    for j in range(331):
        model.forward_pass(X_train[j, :], y_train[j])
        model.backwards_pass(X_train[j, :], y_train[j])
    print(model.get_cost())

51.74992123239852
51.147294679428825
50.51508037419874
49.85218749470351
49.15795522258378
48.430389659402685
47.66986915230756
46.873589431563474
46.04371654857136
45.17623574708654
44.273390869672525
43.33341407937204
42.35790060903024
41.34630557100449
40.300013595892366
39.220362536607794
38.107866057238645
36.96797545295023
35.79877364443249
34.60782490422818
33.3939084590001
32.16530629131521
30.924549193034103
29.67579463270759
28.426311821609275
27.178518640681368
25.94159419151065
24.717012874867535
23.51658342628302
22.338923460338712
21.19345968452831
20.084431597119934
19.016984018829536
17.995435411206856
17.023520594227065
16.105269493794356
15.242000380014643
14.440794017682865
13.697537205332784
13.018009623887608
12.39928242628506
11.84353673860063
11.351775467806847
10.919053550260873
10.54912972077728
10.235543207505012
9.980504591352581
9.778211975286208
9.630704380157288
9.530562662489135
9.475604276808465
9.465747209954838
9.494731298204947
9.56196512962523
9.6625