In [4]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## Simple perceptron w/sklearn API + Iris classification

In [5]:
iris = load_iris()
X = iris.data[:, (2,3)] # only petal length and width
y = (iris.target == 0).astype(int)  # make binary "is it a setosa?"

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [6]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

In [7]:
print(perceptron.score(X_test, y_test)) # given petal length of 2 and width of 0.5, is it a setosa?

1.0


## Simple perceptron from scratch w/Iris classification

A single neuron/node with:
- 2 inputs (petal length and width) and 2 input weights
- 1 bias input (always inputs 1) and 1 bias weight
- 1 binary output (1 == "is setosa", 0 == "is not setosa")

Learning process:
- For each input instance in training set:
    - multiply each input feature (petal length and width) by its weight, and multiply the bias by its weight
    - Sum the results
    - If greater than or equal to 0, output 1, else output 0
    - Calculate error by minusing the output from the target output. 
    - For each weight:
        - Add the ((error * learning rate) * input) to the weight
        - e.g. if the weight was 0.5, the input was -0.7, the output was 1 and the target output was 0, that's an error of -1, 0.05 * -1 * -0.7 = 0.035, so new weight is 0.535. If you got the same input again you've amplified the negative input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
        - e.g. if the weight was 0.5, the input was 0.7, the output 1 and the target output was 0, that's error of -1, 0.05 * -1 * 0.7 = -0.035, so the new weight is 0.465. If you got the same input again, you've supressed the positive input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
       


In [8]:
class MyPerceptron:
    def __init__(self):   
        self.input_weights = [0.5, 0.5]
        self.bias_weight = 0.5
        self.learning_rate = 0.05
    
    def fit(self, X, y):
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error = y[i] - output
            for j in range(0, len(self.input_weights)):
                self.input_weights[j] = self.input_weights[j] + ((self.learning_rate * error) * X[i, j])
            self.bias_weight = self.bias_weight + ((self.learning_rate * error) * 1)
            
    def predict(self, x):
        product = 0
        for input_val, weight in zip(x, self.input_weights):
            product += input_val * weight
        product += 1 * self.bias_weight
        return int(product >= 0) # simple heaviside step function
    
    def score(self, X, y):
        error = y.shape[0]
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error -= abs(y[i] - output)
        return f"{error}/{y.shape[0]}"

In [9]:
model = MyPerceptron()
print(model.input_weights)
print(model.score(X_test,y_test))
model.fit(X_train, y_train)
print(model.input_weights)
print(model.score(X_test,y_test))

[0.5, 0.5]
17/38
[-0.255, 0.155]
38/38


## Simple perceptron continous output using backpropagation

In [10]:
import math

def sigmoid(z):
    return 1 / (1 + math.exp(-z))

def half_mse(a, y):
    return 0.5 * ((a - y)**2)   # half MSE is nicer derivative than MSE


class BackPropagatingPerceptron:
    def __init__(self):
        self.a_out = []   # output after sigmoid - is a matrix because storing output from multiple input instances
        #self.z_out = []   # doesn't need storing for backprop
        self.w_out = [0.5, 0.5]
        self.b_out = 0.5
        
        self.l_rate = 0.5
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(0, X.shape[0]):
            z_out = 0
            for j in range(0, len(self.w_out)):
                z_out += self.w_out[j] * X[i, j]
            z_out += self.b_out
            squished = sigmoid(z_out)
            self.a_out.append(squished) # matrix cus for hidden layers it'd be multiple outputs per input instance
            cost = half_mse(squished, y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        w_delta_out = [[],[]] # 1 gradient per weight per input instance
        b_delta_out = [] # 1 gradient per input instance
        for i in range(0, X.shape[0]): # i = instance
            for j in range(0, len(self.w_out)): # j = weight
                w_delta_out[j].append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * X[i, j])  
            b_delta_out.append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * 1)
        
        for i in range(0, len(w_delta_out)):
            self.w_out[i] = self.w_out[i] - (self.l_rate * (sum(w_delta_out[i]) / len(w_delta_out[i])))
        self.b_out = self.b_out - (self.l_rate * (sum(b_delta_out) / len(b_delta_out)))
        
        # why are my gradient exploding after a while? read le book

In [89]:
diabetes = load_diabetes()
X = diabetes.data[:, (0,3)]
y = diabetes.target

scaler = MinMaxScaler()  # because output is sigmoid
y = scaler.fit_transform(y.reshape(-1, 1))
y = y.reshape(-1)

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [90]:
model = BackPropagatingPerceptron()
for epoch in range(0, 100):
    if epoch % 10 == 0:
        print(model.forward_pass(X_train, y_train))
    model.forward_pass(X_train, y_train)
    model.backwards_pass(X_train, y_train)

16.90715901211041
13.525386711465353
10.715617589400173
8.675543677253598
7.546614088050927
7.378838815622685
8.113994834891521
9.596474423929628
11.609067606293223
13.919600286573923


In [91]:
import random

class MultiLayerPerceptron:
    def __init__(self):
        self.layer_out = Layer(n_nodes = 1)
    
    def forward_pass(self, X, y):
        output = self.layer_out.forward_pass(X)
        cost_out = []
        for i in range(X.shape[0]):
            cost = half_mse(output[0][i], y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        cost_delta = []
        for i in range(X.shape[0]):
            cost_delta.append(-(y[i] - self.layer_out.nodes[0].outputs[i]))
        self.layer_out.backwards_pass(X, cost_delta)
            

class Layer:
    def __init__(self, n_nodes):
        self.nodes = []
        for i in range(n_nodes):
            self.nodes.append(Node(2))
    
    def forward_pass(self, X):
        outputs = []
        for i in range(len(self.nodes)):
            outputs.append(self.nodes[i].forward_pass(X))
        return outputs
    
    def backwards_pass(self, X, z_delta_next):
        for i in range(len(self.nodes)):
            self.nodes[i].backwards_pass(X, z_delta_next)
        
class Node:
    def __init__(self, n_inputs):
        self.outputs = []
        self.weights = []
        self.bias = random.random()
        self.l_rate = 0.5
         
        for i in range(n_inputs):
            self.weights.append(random.random())
    
    def forward_pass(self, X):
        self.outputs = []
        out = []
        for i in range(X.shape[0]):
            z_sum = 0
            for j in range(len(self.weights)):
                z_sum += self.weights[j] * X[i, j]
            z_sum += self.bias
            self.outputs.append(sigmoid(z_sum))
        return self.outputs
    
    def backwards_pass(self, X, z_delta_next):
        w_delta_matrix = [[] for i in range(len(self.weights))]
        b_delta_array = []
        for i in range(0, X.shape[0]):
            z_delta = -self.outputs[i]*(1-self.outputs[i])  # this shouldn't need to be minus
            for j in range(len(self.weights)):
                part_w_delta = z_delta * X[i, j]
                full_w_delta = 0
                for k in range(len(z_delta_next)):
                    full_w_delta += z_delta_next[k] * part_w_delta
                w_delta_matrix[j].append(full_w_delta)
            b_delta_array.append(self.outputs[i]*(1-self.outputs[i]))
        
        for i in range(len(w_delta_matrix)):
            self.weights[i] = self.weights[i] - (self.l_rate * (sum(w_delta_matrix[i]) / len(w_delta_matrix[i])))
        self.bias = self.bias - (self.l_rate * (sum(b_delta_array) / len(b_delta_array)))
        
        
# I need to add to a z_delta_next for each node in prevous layer during backprop
# For each node in previous layer, each node should add its z_delta multiplied by the weight for the prev layer node

In [92]:
model = MultiLayerPerceptron()
for epoch in range(0, 10):
    print(model.forward_pass(X_train, y_train))
    model.backwards_pass(X_train, y_train)
    print(model.layer_out.nodes[0].weights)

15.115746688753259
[0.2501603263332603, -0.0639675404650411]
13.455524804625488
[0.12947866173950096, -0.06590486991490438]
12.110015721892596
[0.11292146718459868, -0.061227647750332674]
10.960118074058416
[0.12444912049821534, -0.06375386787595298]
10.149534511060091
[0.14981684315136506, -0.06892872677527083]
9.668342756358461
[0.17650448319510134, -0.07267731646310649]
9.481605585339882
[0.18263046955815346, -0.07318717612002035]
9.550069743316394
[0.1455816268403915, -0.07046018277778628]
9.864806571527508
[0.07397810211881396, -0.05747237113641535]
10.386405023539027
[0.01981436498912873, -0.024643716944335536]
