In [1]:
import numpy as np

In [2]:
X = np.array([[0, 0, 0],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1],
              [0.5, 0.5, 0.5],
              [0.8, 0.2, 0.3]])

Y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1]])
print(X) 
print("")
print(Y) 

[[0.  0.  0. ]
 [0.  1.  1. ]
 [1.  0.  1. ]
 [1.  1.  1. ]
 [0.5 0.5 0.5]
 [0.8 0.2 0.3]]

[[0]
 [0]
 [0]
 [1]
 [0]
 [1]]


###### X is a NxM matrix and Y is a Nx1 matrix

#### Implementation of Basic Perceptron

###### We add the column of X0 where all X0s are 1s to X so that the matrix multiplication in the next steps becomes easier. This makes X a NxM+1 matrix

In [3]:
##Adding X0 column of ones
X = np.c_[np.ones((X.shape[0], 1)), X]
X ## NxM+1 matrix

array([[1. , 0. , 0. , 0. ],
       [1. , 0. , 1. , 1. ],
       [1. , 1. , 0. , 1. ],
       [1. , 1. , 1. , 1. ],
       [1. , 0.5, 0.5, 0.5],
       [1. , 0.8, 0.2, 0.3]])

###### We set our initial learning rate. We can play around with this rate, to control the amount of gradient descent has effect on params. Larger rate makes the descent rapid but can cause cost to sway other way, where as smaller rate can cause the gradient descent steps to have very less effect.
###### We set the number of iterations as well. Can play around till the cost is minimized after a point.
###### Updated m = M+1 and n = N

In [16]:
learning_rate = 0.2
n_iterations = 1000
m = X.shape[1] ##No. of features(M+1)
n = X.shape[0] ##No. of training samples(N)

###### Setting the initial params values. Can set all to 0 as well.

In [17]:
##Initialize random weights
weights = np.random.rand(m, 1)
weights.shape ## mx1 matrix

(4, 1)

###### Defining the perceptron function

In [18]:
def perceptron_function(z):
  return np.where(z >= 0, 1, 0)

###### Parametes update rule similar to gradient descent of Logistic Regression

In [19]:
for _ in range(n_iterations):
  ## Batch Grad ascent
  weights = weights + learning_rate * (X.T @ (Y - (perceptron_function(X @ weights)))) ## theta = theta + alpha*((y - predicition) * X)
weights

array([[-0.66254957],
       [ 1.06566588],
       [ 0.67897172],
       [-0.44695126]])

###### The predictions given by multiplying the X and params

In [20]:
predictions = X @ weights
predictions

array([[-0.66254957],
       [-0.43052911],
       [-0.04383495],
       [ 0.63513677],
       [-0.0137064 ],
       [ 0.1916921 ]])

In [21]:
predictions = (predictions >= 0.5).astype(int)
predictions

array([[0],
       [0],
       [0],
       [1],
       [0],
       [0]])