# Problem 03.05

## Problem:

The Perceptron code on the website is a batch update algorithm, where the whole of the dataset is fed in to find the errors, and then the weights are updated afterwards, as is discussed in Section 3.3.5. Convert the code to run as sequential updates and then compare the results of using the two versions.

## Solution:

Let's first import `numpy`:

In [1]:
import numpy as np
import time

This is the original code as found on the website:

In [2]:
class pcn_batch:
    """ A basic Perceptron"""
    
    def __init__(self,inputs,targets):
        """ Constructor """
        # Set up network size
        if np.ndim(inputs)>1:
            self.nIn = np.shape(inputs)[1]
        else: 
            self.nIn = 1
    
        if np.ndim(targets)>1:
            self.nOut = np.shape(targets)[1]
        else:
            self.nOut = 1

        self.nData = np.shape(inputs)[0]
    
        # Initialise network
        self.weights = np.random.rand(self.nIn+1,self.nOut)*0.1-0.05

    def pcntrain(self,inputs,targets,eta,nIterations):
        """ Train the thing """ 
        # Add the inputs that match the bias node
        inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
        # Training
        change = range(self.nData)

        for n in range(nIterations):
            
            self.activations = self.pcnfwd(inputs);
            self.weights -= eta*np.dot(np.transpose(inputs),self.activations-targets)
        
            # Randomise order of inputs
            #np.random.shuffle(change)
            #inputs = inputs[change,:]
            #targets = targets[change,:]
            
        #return self.weights

    def pcnfwd(self,inputs):
        """ Run the network forward """
        # Compute activations
        activations =  np.dot(inputs,self.weights)

        # Threshold the activations
        return np.where(activations>0,1,0)


    def confmat(self,inputs,targets):
        """Confusion matrix"""

        # Add the inputs that match the bias node
        inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
        
        outputs = np.dot(inputs,self.weights)
    
        nClasses = np.shape(targets)[1]

        if nClasses==1:
            nClasses = 2
            outputs = np.where(outputs>0,1,0)
        else:
            # 1-of-N encoding
            outputs = np.argmax(outputs,1)
            targets = np.argmax(targets,1)

        cm = np.zeros((nClasses,nClasses))
        for i in range(nClasses):
            for j in range(nClasses):
                cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))

        print(cm)
        print(np.trace(cm)/np.sum(cm))

Now, let's rewrite the code to run as sequential algorithm:

In [3]:
class pcn_sequential:
    """ A basic Perceptron"""
    
    def __init__(self,inputs,targets):
        """ Constructor """
        # Set up network size
        if np.ndim(inputs)>1:
            self.nIn = np.shape(inputs)[1]
        else: 
            self.nIn = 1
    
        if np.ndim(targets)>1:
            self.nOut = np.shape(targets)[1]
        else:
            self.nOut = 1

        self.nData = np.shape(inputs)[0]
    
        # Initialise network
        self.weights = np.random.rand(self.nIn+1,self.nOut)*0.1-0.05

    def pcntrain(self,inputs,targets,eta,nIterations):
        """ Train the thing """ 
        # Add the inputs that match the bias node
        inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
        # Training
        change = range(self.nData)
        
        nData = np.shape(inputs)[0]
        M = np.shape(inputs)[1]
        N = self.nOut

        for n in range(nIterations):
            
            self.activations = self.pcnfwd(inputs)
            
            for data in range(nData):
                for n in range(N):
                    for m in range(M):
                        self.weights[m, n] -= eta*inputs[data, m]*(self.activations[data, n] - targets[data, n])
        
            # Randomise order of inputs
            #np.random.shuffle(change)
            #inputs = inputs[change,:]
            #targets = targets[change,:]
            
        #return self.weights

    def pcnfwd(self,inputs):
        """ Run the network forward """
        
        nData = np.shape(inputs)[0]
        M = np.shape(inputs)[1]
        N = self.nOut
        
        activations = np.zeros((nData, N))
        
        for data in range(nData):
            for n in range(N):

                for m in range(M):
                    activations[data, n] += self.weights[m, n] * inputs[data, m]

                if activations[data, n] > 0:
                    activations[data, n] = 1
                else:
                    activations[data, n] = 0

        # Threshold the activations
        return activations


    def confmat(self,inputs,targets):
        """Confusion matrix"""

        # Add the inputs that match the bias node
        inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
        
        outputs = np.dot(inputs,self.weights)
    
        nClasses = np.shape(targets)[1]

        if nClasses==1:
            nClasses = 2
            outputs = np.where(outputs>0,1,0)
        else:
            # 1-of-N encoding
            outputs = np.argmax(outputs,1)
            targets = np.argmax(targets,1)

        cm = np.zeros((nClasses,nClasses))
        for i in range(nClasses):
            for j in range(nClasses):
                cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))

        print(cm)
        print(np.trace(cm)/np.sum(cm))

Create test data:

In [4]:
inputData = np.matrix([[0, 0], [0, 1], [1, 0], [1, 1]])
labels = np.matrix([0, 1, 1, 1])

Set parameters for each Perceptron type:

In [5]:
eta = 0.25
iterations = 100

Run batch version:

In [6]:
pBatch = pcn_batch(inputData, labels.T)
tBatchStart = time.time()
pBatch.pcntrain(inputData, labels.T, eta, iterations)
tBatchEnd = time.time()

inputData = np.concatenate((inputData,-np.ones((np.shape(inputData)[0],1))),axis=1)
pBatchPred = pBatch.pcnfwd(inputData)

print(tBatchEnd - tBatchStart)

0.006588459014892578


Run sequential version:

In [7]:
pSeq = pcn_sequential(inputData, labels.T)
tSeqStart = time.time()
pSeq.pcntrain(inputData, labels.T, eta, iterations)
tSeqEnd = time.time()

inputData = np.concatenate((inputData,-np.ones((np.shape(inputData)[0],1))),axis=1)
pSeqPred = pSeq.pcnfwd(inputData)

print(tSeqEnd - tSeqStart)

0.015163183212280273


It is evident that the batch version is much faster since the sequential version has three `for` loops. Looking at the outputs:

In [8]:
print("Batch predictions:")
print(pBatchPred)
print("Sequential predictions:")
print(pSeqPred)

Batch predictions:
[[0]
 [1]
 [1]
 [1]]
Sequential predictions:
[[ 0.]
 [ 1.]
 [ 1.]
 [ 1.]]


Both the batch and sequential versions obtain the same outputs. The batch version is ultimately a lot faster than the sequential one though.