In [68]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

## Simple perceptron w/sklearn API + Iris classification

In [53]:
iris = load_iris()
X = iris.data[:, (2,3)] # only petal length and width
y = (iris.target == 0).astype(int)  # make binary "is it a setosa?"

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [92]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

In [93]:
print(perceptron.score(X_test, y_test)) # given petal length of 2 and width of 0.5, is it a setosa?

1.0


## Simple perceptron from scratch w/Iris classification

A single neuron/node with:
- 2 inputs (petal length and width) and 2 input weights
- 1 bias input (always inputs 1) and 1 bias weight
- 1 binary output (1 == "is setosa", 0 == "is not setosa")

Learning process:
- For each input instance in training set:
    - multiply each input feature (petal length and width) by its weight, and multiply the bias by its weight
    - Sum the results
    - If greater than or equal to 0, output 1, else output 0
    - Calculate error by minusing the output from the target output. 
    - For each weight:
        - Add the ((error * learning rate) * input) to the weight
        - e.g. if the weight was 0.5, the input was -0.7, the output was 1 and the target output was 0, that's an error of -1, 0.05 * -1 * -0.7 = 0.035, so new weight is 0.535. If you got the same input again you've amplified the negative input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
        - e.g. if the weight was 0.5, the input was 0.7, the output 1 and the target output was 0, that's error of -1, 0.05 * -1 * 0.7 = -0.035, so the new weight is 0.465. If you got the same input again, you've supressed the positive input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
       


In [80]:
class MyPerceptron:
    def __init__(self):   
        self.input_weights = [0.5, 0.5]
        self.bias_weight = 0.5
        self.learning_rate = 0.05
    
    def fit(self, X, y):
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error = y[i] - output
            for j in range(0, len(self.input_weights)):
                self.input_weights[j] = self.input_weights[j] + ((self.learning_rate * error) * X[i, j])
            self.bias_weight = self.bias_weight + ((self.learning_rate * error) * 1)
            
    def predict(self, x):
        product = 0
        for input_val, weight in zip(x, self.input_weights):
            product += input_val * weight
        product += 1 * self.bias_weight
        return int(product >= 0) # simple heaviside step function
    
    def score(self, X, y):
        error = y.shape[0]
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error -= abs(y[i] - output)
        return f"{error}/{y.shape[0]}"

In [97]:
model = MyPerceptron()
print(model.input_weights)
print(model.score(X_test,y_test))
model.fit(X_train, y_train)
print(model.input_weights)
print(model.score(X_test,y_test))

[0.5, 0.5]
20/38
[-0.20999999999999996, 0.19]
38/38


## Simple perceptron continous output using backpropagation

In [82]:
import math

def sigmoid(z):
    return 1 / (1 + math.exp(-z))

def half_mse(a, y):
    return 0.5 * ((a - y)**2)   # half MSE is nicer derivative than MSE


class BackPropagatingPerceptron:
    def __init__(self):
        self.a_out = []   # output after sigmoid - is a matrix because storing output from multiple input instances
        #self.z_out = []   # doesn't need storing for backprop
        self.w_out = [0.5, 0.5]
        self.b_out = 0.5
        
        self.l_rate = 0.5
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(0, X.shape[0]):
            z_out = 0
            for j in range(0, len(self.w_out)):
                z_out += self.w_out[j] * X[i, j]
            z_out += self.b_out
            squished = sigmoid(z_out)
            self.a_out.append(squished) # matrix cus for hidden layers it'd be multiple outputs per input instance
            cost = half_mse(squished, y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        w_delta_out = [[],[]] # 1 gradient per weight per input instance
        b_delta_out = [] # 1 gradient per input instance
        for i in range(0, X.shape[0]): # i = instance
            for j in range(0, len(self.w_out)): # j = weight
                w_delta_out[j].append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * X[i, j])  
            b_delta_out.append((-(y[i] - self.a_out[i])) * (self.a_out[i]*(1-self.a_out[i])) * 1)
        
        for i in range(0, len(w_delta_out)):
            self.w_out[i] = self.w_out[i] - (self.l_rate * (sum(w_delta_out[i]) / len(w_delta_out[i])))
        self.b_out = self.b_out - (self.l_rate * (sum(b_delta_out) / len(b_delta_out)))
        
        # why are my gradient exploding after a while? read le book

In [98]:
diabetes = load_diabetes()
X = diabetes.data[:, (1,2)]
y = diabetes.target

scaler = MinMaxScaler()  # because output is sigmoid
y = scaler.fit_transform(y.reshape(-1, 1))
y = y.reshape(-1)

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [99]:
model = BackPropagatingPerceptron()
for epoch in range(0, 100):
    if epoch % 10 == 0:
        print(model.forward_pass(X_train, y_train))
    model.forward_pass(X_train, y_train)
    model.backwards_pass(X_train, y_train)

16.48293656346961
12.67279912219307
9.643782396149197
7.6396136368677645
6.727906741038882
6.801999280996364
7.642497150062083
8.995626272330364
10.632513636494656
12.377404998217253


In [107]:
import random

class MultiLayerPerceptron:
    def __init__(self):
        self.layer_out = Layer(n_nodes = 1)
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(X.shape[0]):
            output = self.layer_out.forward_pass(X[i, :])
            cost = half_mse(output, y[i])
            cost_out.append(cost)
        return sum(cost_out)
            

class Layer:
    def __init__(self, n_nodes):
        self.nodes = []
        for i in range(n_nodes):
            self.nodes.append(Node(2))
    
    def forward_pass(self, x):
        outputs = []
        for node in self.nodes:
            outputs.append(node.forward_pass(x))
        return outputs
        
class Node:
    def __init__(self, n_inputs):
        self.outputs = []
        self.weights = []
#         self.w_deltas = []
        self.bias = random.random()
#         self.b_delta = 0
        self.l_rate = 0.5
         
        for i in range(n_inputs):
            self.weights.append(random.random())
    
    def forward_pass(self, x):  # forward individually
        z_sum = 0
        for i in range(len(self.weights)):
            z_sum += self.weights[i] * x[i]
        z_sum += self.bias
        out = sigmoid(z_sum)
        self.outputs.append(out) 
        return out
    
    def backwards_pass(self, X, delta_outs):   # backwards all at once?
        w_delta_matrix = [[] for i in range(len(self.weights))]
        b_delta_array = []
        for i in range(0, X.shape[0]):
            for j in range(len(self.weights)):
                weight_component = (self.outputs[i]*(1-self.outputs[i])) * X[i, j]
                delta_sum = 0
                for k in range(delta_outs):
                    delta_sum += delta_outs[k] * weight_component
                w_delta_matrix[j].append(delta_sum)
            b_delta_matrix.append(self.outputs[i]*(1-self.outputs[i]))
        
        for i in range(len(w_delta_matrix)):
            self.weights[i] = self.weights[i] - (self.l_rate * (sum(w_delta_matrix[i] / len(w_delta_matrix[i]))))
        self.bias = self.bias - (self.l_rate * (sum(b_delta_array / len(b_delta_array))))

In [115]:
model = MultiLayerPerceptron()
for epoch in range(0, 1):
    print(model.forward_pass(X_train, y_train))
    print(model.layer_out.nodes[0].outputs)

[14.70187075]
[0.2679330427008332, 0.6409584001807436, 0.5666026782316699, 0.7383959697265327, 0.6109369062038883, 0.8290164509681348, 0.6819559255561027, 0.5823872604369998, 0.5912731072117151, 0.6976378300059309, 0.4323818454049091, 0.3373139825954124, 0.9059719237823172, 0.4982825727040725, 0.7722213591998067, 0.6617607673787347, 0.9557105858400535, 0.8183744950468672, 0.7166082178130568, 0.7833802392973794, 0.7239916409701871, 0.606572461014682, 0.4686559502060826, 0.4640954814948664, 0.6859171265709004, 0.6282114814545476, 0.21137489157160558, 0.7754285345228712, 0.6367304804893156, 0.2825525064006844, 0.6976378300059309, 0.3880803467293869, 0.8072242078223222, 0.4323818454049091, 0.8113980975549333, 0.7367076973342339, 0.6679834544822926, 0.6598057844039285, 0.25034760508521037, 0.7419202430089896, 0.4640954814948664, 0.23028396034365137, 0.5575818054721459, 0.7383959697265327, 0.7276383890673288, 0.8223600917506466, 0.5868372535701334, 0.24017252505573242, 0.5599493993000064, 0.