# Artificial Neural Networks - second lecture: The Batch Perceptron Convergence Algorithm 

Let's start by importing the necessay libraries to the environment


In [1]:
import numpy as np
from numpy import random as rnd
import matplotlib.pyplot as plt
from Helper import datasets_Noisy_AND_gate

%matplotlib inline


We consider the neuron's model illustrated by the figure below:

<img src="Perceptron_1.png" style="width:500px;height:300px;">

We consider in the following the vectors of inputs and weights in the following form:

$W=[b,w_1,w2]^T$

$X=[1, x_1, x_2]^T$

$v = W^T.X$

$y = signum(v)$ has the values $\{-1, +1\} $

In this program we will code the Batch version of the Perceptron convergence algorithm. we will write a code that compute:

0- function **generate( )** to generate training and testing datasets,

0- function **intialize( )** to initialize weights and bias,

1- function **linear_combier( )** to compute the induced local field $v$,

2- function **signum( )** to compute the output $y$,

3- function **cost( )** to comput the cost function $J( W )$,

4- function **gradient( )** to compute the gradient of the cost function $\Delta J(W)$,

5- function **update( )** to update the vector W,

6- function **model( )** to build and train the model

7- function **predict( )** to predict the class of given patterns

PSD = 


In [2]:
# function generate
def generate():
    # returns:
    # X_train a matrix (m x N)
    # X_test a matrix (m x N1)
    # Y_train a vector (1 x N)
    # Y_test a vector (1 x N1)
    X_train, X_test, Y_train, Y_test = datasets_Noisy_AND_gate(N = 1000, PSD = 0.04, test = True, valid = False)
    return X_train, X_test, Y_train, Y_test

In [3]:
X_train, X_test, Y_train, Y_test = generate()
print('X_train.shape',X_train.shape)
print('Y_train.shap',Y_train.shape)
print('X_test.shape', X_test.shape)
print(X_train[:,0:10])
print(Y_train[:,0:10])


X_train.shape (2, 800)
Y_train.shap (1, 800)
X_test.shape (2, 199)
[[-0.02667382  0.20804942  0.05689384 -0.16051661  0.8682343   0.89481828
  -0.00531702  0.18866897  0.87121983  0.02775562]
 [-0.0038127   1.27382569  0.96083174  0.16784462  0.15743851  1.35267969
   0.75465359  0.91383112 -0.18320986  0.04223402]]
[[-1. -1. -1. -1. -1.  1. -1. -1. -1. -1.]]


In [69]:
# function intialization
def initialize( m):
    # m is the number of inputs - dimension of the input vector 
    # m+1 the dimension of vector W including the bias
    # this function should return a column vector (m+1 x 1)
    # your code starts here 1 line
    #W = np.zeros((m+1, 1))
    W = np.ones((m+1, 1))*0.1
    
    return W

In [70]:
m = 2
W = initialize(m)
print(W)

[[0.1]
 [0.1]
 [0.1]]


The correct answer is:

$W = [0.0, 0.0, 0.0]^T$

In [71]:
# function linear_combiener
def linear_combiner(W, X):
    # inputs W (m+1 x 1), X(m+1 x N)
    # The output should be a vector (1 x N)
    
    # your code starts here 1 line
    V = np.dot(W.T, X)
    
    return V

In [72]:
W = np.ones((3,1))
X = np.ones((3,5))
V = linear_combiner(W, X)
print('V =', V)

V = [[3. 3. 3. 3. 3.]]


Theright answer is :

$V = [[3.0, 3.0, 3.0, 3.0, 3.0]]$

In [73]:
# function signum
def signum(x):
    # the input is a vector (1 x N)
    # the output should be a vector (1 x N)
    
    # your code starts here 1 line
    y = np.sign(x)
    
    return y

In [74]:
#X = np.random.randn(2,5)
X = [[0.2, -0.6, 1.66, -1e3]]
Y = signum(X)
print('Y =', Y)

Y = [[ 1. -1.  1. -1.]]


The correct answer is :

$Y = [[1., -1.,  1., -1.]]$

In [75]:
# function cost
def cost(V, D):
    # input V and D are vectors of dimensions (1 x N)
    # the output is a scalar
    
    # your code starts here 3 lines
    Y = np.sign(V)
    error = Y-D
    misclass = (error != 0)
    
    x = -np.sum(V * D * misclass)
    
    
    return x

In [76]:
V = np.array([[0.5, -0.6, 0.3, -0.12]])
D = np.array([[1, 1, -1, -1]])
print(cost(V,D))

0.8999999999999999


The good answer is :

$J(W) = , -0.9$

In [77]:
# function gradient
def gradient(X, V, D):
    # inputs are :
    # D desired output a vector (1 x N)
    # V the induced local field vector (1x N)
    # X matrix of training dataset (m+1 x N)
    # the output should be a vector (m+1 x 1)
    dim = (np.sign(V)-D)!=0
    idx = dim[0]
    Xp = X[:, idx]
    if Xp.shape != (0,0):
        on = np.ones((Xp.shape[0], 1))
        Dp = np.multiply(on, D[dim])
        grad = -np.sum(Xp*Dp,1, keepdims = True)
        return grad
    return 0

In [78]:
#rnd.seed(0)
X = np.random.rand(3,5)
V = np.array([[0.5, -0.6, 0.3, -0.1, 0.8]])
D = np.array([[1, 1, -1, 1, -1]])
gradient(X,V,D)

array([[0.02029548],
       [0.07488641],
       [0.62113876]])

In [79]:
#rnd.seed(0)
from numpy.matlib import repmat
X = np.random.rand(3,5)
V = np.array([[0.5, -0.6, 0.3, -0.1, 0.8]])
D = np.array([[1, 1, -1, 1, -1]])
DD = repmat(D, X.shape[0],1)
g = gradient(X,V,DD)

ValueError: operands could not be broadcast together with shapes (3,4) (3,12) 

In [None]:
#function update
def update(W, gradient, learning_rate = 0.2):
    # inputs are:
    # W weight vector (m+1 x1)
    # gradient a vector (m+1 x 1)
    # learning rate is a scalar
    # this function returns a vector (m+1 x1)
    # your code starts here 1 line
    Delta_W = - learning_rate * gradient
    W = W + Delta_W
    
    return W

In [None]:
W = np.ones((m+1,1))
grad = np.ones(W.shape)
grad[1, 0] *= -1
update(W,grad)

The correct answer is $W= [[0.8], [1.2], [0.8]]$

In [None]:
# function model
def model(X, D, learning_rate = 0.1, epoch = 50):
    # inputs:
    # X matix of inputs (m x N)
    # D desired output (1 x N)
    # learning_rate is a positive scalar
    # epoch is an positive integer, the number of times the perceptron convergence algorithm has been lunched 
    # output:
    # the trained weights
    # the cost vector (1 x N)
    m, N = X.shape
    X = np.concatenate((np.ones((1,N)), X), axis = 0)
    J = np.zeros((1, epoch))
    W = initialize(m)
    for k in range(epoch):
        # your code starts here 5-6 lines
        V = linear_combiner(W, X)
        Y = signum(V)
        j = cost(V, D)
        J[0, k] = j
        grad = gradient(X,V,D)
        W = update(W, grad, learning_rate)
    
    print("---------finish---------")
    return W, J
        
    
    

In [None]:
m =2
X_train, X_test, Y_train, Y_test = generate()
W, J = model(X_train , Y_train, learning_rate = 0.1, epoch =20)
plt.plot(J[0])

In [None]:
# function predict
def predict(X):
    V = linear_combiner(W, X)
    Y = signum(V)
    return Y
    

In [None]:
N_test = X_test.shape[1]
X = np.concatenate((np.ones((1,N_test)), X_test), axis = 0)
P = predict(X)
print('--------Done---------')

Now let's try the perceptron when noisy inputs are presented. Try several values of the noise PSD and check the number of errors commtted :

Finally, you can calculate the number of errors committed by the perceptron:

In [80]:
Errors = np.sum(np.abs(P -Y_test))/2
print("Number of errors {} ".format(Errors))

NameError: name 'P' is not defined