In [126]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## Simple perceptron w/sklearn API + Iris classification

In [7]:
iris = load_iris()
X = iris.data[:, (2,3)] # only petal length and width
y = (iris.target == 0).astype(int)  # make binary "is it a setosa?"

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [92]:
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

In [93]:
print(perceptron.score(X_test, y_test)) # given petal length of 2 and width of 0.5, is it a setosa?

1.0


## Simple perceptron from scratch w/Iris classification

A single neuron/node with:
- 2 inputs (petal length and width) and 2 input weights
- 1 bias input (always inputs 1) and 1 bias weight
- 1 binary output (1 == "is setosa", 0 == "is not setosa")

Learning process:
- For each input instance in training set:
    - multiply each input feature (petal length and width) by its weight, and multiply the bias by its weight
    - Sum the results
    - If greater than or equal to 0, output 1, else output 0
    - Calculate error by minusing the output from the target output. 
    - For each weight:
        - Add the ((error * learning rate) * input) to the weight
        - e.g. if the weight was 0.5, the input was -0.7, the output was 1 and the target output was 0, that's an error of -1, 0.05 * -1 * -0.7 = 0.035, so new weight is 0.535. If you got the same input again you've amplified the negative input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
        - e.g. if the weight was 0.5, the input was 0.7, the output 1 and the target output was 0, that's error of -1, 0.05 * -1 * 0.7 = -0.035, so the new weight is 0.465. If you got the same input again, you've supressed the positive input which will result in a sum of weights * inputs closer to being less than 0, resulting in an output of 0, the target.
       


In [80]:
class MyPerceptron:
    def __init__(self):   
        self.input_weights = [0.5, 0.5]
        self.bias_weight = 0.5
        self.learning_rate = 0.05
    
    def fit(self, X, y):
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error = y[i] - output
            for j in range(0, len(self.input_weights)):
                self.input_weights[j] = self.input_weights[j] + ((self.learning_rate * error) * X[i, j])
            self.bias_weight = self.bias_weight + ((self.learning_rate * error) * 1)
            
    def predict(self, x):
        product = 0
        for input_val, weight in zip(x, self.input_weights):
            product += input_val * weight
        product += 1 * self.bias_weight
        return int(product >= 0) # simple heaviside step function
    
    def score(self, X, y):
        error = y.shape[0]
        for i in range(0, X.shape[0]):
            output = self.predict(X[i, :])
            error -= abs(y[i] - output)
        return f"{error}/{y.shape[0]}"

In [97]:
model = MyPerceptron()
print(model.input_weights)
print(model.score(X_test,y_test))
model.fit(X_train, y_train)
print(model.input_weights)
print(model.score(X_test,y_test))

[0.5, 0.5]
20/38
[-0.20999999999999996, 0.19]
38/38


## Simple perceptron continous output using backpropagation

In [183]:
import math

def sigmoid(z):
    return 1 / (1 + math.exp(-z))

def half_mse(a, y):
    return 0.5 * ((a - y)**2)   # half MSE is nicer derivative than MSE


class BackPropagatingPerceptron:
    def __init__(self):
        self.a_out = []   # output after sigmoid - is a matrix because storing output from multiple input instances
        #self.z_out = []   # doesn't need storing for backprop
        self.w_out = [0.5, 0.5]
        self.b_out = 0.5
        
        self.l_rate = 0.5 
    
    def forward_pass(self, X, y):
        cost_out = []
        for i in range(0, X.shape[0]):
            z_out = 0
            for j in range(0, len(self.w_out)):
                z_out += self.w_out[j] * X[i, j]
            z_out += self.b_out
            squished = sigmoid(z_out)
            self.a_out.append([squished]) # matrix cus for hidden layers it'd be multiple outputs per input instance
            cost = half_mse(squished, y[i])
            cost_out.append(cost)
        return sum(cost_out)
    
    def backwards_pass(self, X, y):
        w_out_delta = [[],[]] # 1 gradient per weight per input instance
        b_out_delta = [] # 1 gradient per input instance
        for i in range(0, X.shape[0]): # i = instance
            for j in range(0, len(self.w_out)): # j = weight
                w_out_delta[j].append((-(y[i] - self.a_out[j][0])) * (self.a_out[j][0]*(1-self.a_out[j][0])) * X[i, j])  
            b_out_delta.append((-(y[i] - self.a_out[j][0])) * (self.a_out[j][0]*(1-self.a_out[j][0])) * 1)
        
        for i in range(0, len(w_out_delta)):
            self.w_out[i] = self.w_out[i] - (self.l_rate * (sum(w_out_delta[i]) / len(w_out_delta[i])))
        self.b_out = self.b_out - self.l_rate * (sum(b_out_delta) / len(b_out_delta))
        
        # why are my gradient exploding after a while? read le book

In [184]:
diabetes = load_diabetes()
X = diabetes.data[:, (2,3)]
y = diabetes.target

scaler = MinMaxScaler()
y = scaler.fit_transform(y.reshape(-1, 1))
y.flatten()

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [185]:
model = BackPropagatingPerceptron()
for epoch in range(0, 100):
    print(model.forward_pass(X_train, y_train))
    model.backwards_pass(X_train, y_train)
    print(model.w_out)

[17.59114367]
[array([0.50076254]), array([0.50065772])]
[17.12512794]
[array([0.50152508]), array([0.50131543])]
[16.66923085]
[array([0.50228762]), array([0.50197315])]
[16.2238171]
[array([0.50305016]), array([0.50263087])]
[15.78924058]
[array([0.5038127]), array([0.50328858])]
[15.36584354]
[array([0.50457524]), array([0.5039463])]
[14.95395575]
[array([0.50533778]), array([0.50460402])]
[14.55389374]
[array([0.50610032]), array([0.50526173])]
[14.16596003]
[array([0.50686286]), array([0.50591945])]
[13.79044239]
[array([0.5076254]), array([0.50657717])]
[13.4276132]
[array([0.50838794]), array([0.50723488])]
[13.07772873]
[array([0.50915048]), array([0.5078926])]
[12.74102858]
[array([0.50991302]), array([0.50855032])]
[12.41773511]
[array([0.51067556]), array([0.50920803])]
[12.10805289]
[array([0.5114381]), array([0.50986575])]
[11.81216828]
[array([0.51220064]), array([0.51052347])]
[11.53024895]
[array([0.51296318]), array([0.51118118])]
[11.26244359]
[array([0.51372572]), ar