In [None]:
"""
The MIT License (MIT)
Copyright (c) 2021 NVIDIA
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, anprint (compute_output(w, x_train[0]))
d to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""


This code example implements the perceptron learning algorithm and demonstrates how to use it to learn the not AND (NAND) function. More context for this code example can be found in video 2.2 "Programming Example: Perceptron" in the video series "Learning Deep Learning: From Perceptron to Large Language Models" by Magnus Ekman (Video ISBN-13: 9780138177614).


The first code snippet shows the initialization code where we first import a library for randomization and then initialize variables for the training examples and perceptron weights.


In [None]:
import random

def show_learning(w):
    print('w0 =', '%5.2f' % w[0], ', w1 =', '%5.2f' % w[1],
          ', w2 =', '%5.2f' % w[2])

# Define variables needed to control training process.
random.seed(7) # To make repeatable
LEARNING_RATE = 0.1
index_list = [0, 1, 2, 3] # Used to randomize order

# Define training examples.
x_train = [(1.0, -1.0, -1.0),
           (1.0, -1.0, 1.0),
           (1.0, 1.0, -1.0),
           (1.0, 1.0, 1.0)] # Inputs
y_train = [1.0, 1.0, 1.0, -1.0] # Output (ground truth)

# Define perceptron weights.
w = [0.2, -0.6, 0.25] # Initialize to some "random" numbers

# Print initial weights.
show_learning(w)


Note how each input example consists of three values, but the first value is always 1.0 because it is the bias term. The next code snippet states the perceptron output computation.


In [None]:
# First element in vector x must be 1.
# Length of w and x must be n+1 for neuron with n inputs.
def compute_output(w, x):
    z = 0.0
    for i in range(len(w)):
        z += x[i] * w[i] # Compute sum of weighted inputs
    if z < 0: # Apply sign function
        return -1
    else:
        return 1



The final code snippet contains the perceptron training loop. It is a nested loop in which the inner loop runs through all four training examples in random order. For each example, it computes the output and adjusts and prints the weights if the output is wrong. The outer loop tests whether the perceptron provided correct output for all four examples and, if so, terminates the program.


In [None]:
# Perceptron training loop.
all_correct = False
while not all_correct:
    all_correct = True
    random.shuffle(index_list) # Randomize order
    for i in index_list:
        x = x_train[i]
        y = y_train[i]
        p_out = compute_output(w, x) # Perceptron function
        if y != p_out: # Update weights when wrong
            for j in range(0, len(w)):
                w[j] += (y * LEARNING_RATE * x[j])
            all_correct = False
            show_learning(w) # Show updated weights


Print the output of all four input examples using the learned weights.


In [None]:
print ('y0_truth =', '%5.2f' % y_train[0], ', y0 =', '%5.2f' % compute_output(w, x_train[0]))
print ('y1_truth =', '%5.2f' % y_train[1], ', y0 =', '%5.2f' % compute_output(w, x_train[1]))
print ('y2_truth =', '%5.2f' % y_train[2], ', y0 =', '%5.2f' % compute_output(w, x_train[2]))
print ('y3_truth =', '%5.2f' % y_train[3], ', y0 =', '%5.2f' % compute_output(w, x_train[3]))
