# Perceptron Learning in 2D

This notebook visualizes Rosenblatt's perception learning algorithm with simple 2-dimensional training data.

First we define `PerceptronVisualizingWidget`, which visualizes the state of the network during each of the steps of the sequence of training iterations.

In [None]:
import torch, numpy
from copy import deepcopy
from baukit import show, Widget, PlotWidget, Range, Numberbox

prng = numpy.random.RandomState(1)
data = torch.Tensor(prng.randn(100, 2))
labels = torch.Tensor(numpy.stack([(d[0] - 0.4 < -0.7 * d[1]) for d in data])) * 2 - 1


class PerceptronVisualizingWidget(Widget):
    def __init__(self, data=[], labels=[]):
        super().__init__()
        self.data = data
        self.labels = labels
        self.history = []
        self.plot = PlotWidget(self.visualize_net, nrows=1, ncols=3, figsize=(11,4)) # , bbox_inches='tight')
        scrubber = Range(min=0, max=0, value=self.plot.prop('index'))
        numbox = Numberbox(value=self.plot.prop('index'))
        self.content = [
            [
                [show.style(alignContent='center'), 'Iteration'],
                numbox,
                show.style(flex=20), scrubber
            ],
            self.plot
        ]
    
    def _repr_html_(self):
        return show.html(self.content)
    
    def add(self, net, x=None, ok=False, y=None):
        with torch.no_grad():
            self.history.append((deepcopy(net).cpu(), x, ok, y))
        self.content[0][-1].max = len(self.history) - 1
        if len(self.history) == 1:
            self.plot.index = 0

    def visualize_net(self, fig, index=0):
        fig.subplots_adjust(0.02, 0.02, 0.98, 0.98)
        ax1, ax2, ax3 = fig.axes
        ax1.clear(); ax2.clear(); ax3.clear()
        if index >= len(self.history):
            return
        net, datum, ok, label = self.history[index]
        grid = torch.stack([
            torch.linspace(-3, 3, 200)[None, :].expand(200, 200),
            torch.linspace(3, -3, 200)[:, None].expand(200, 200),
        ])
        x, y = grid
        ax1.set_title('network output')
        score = net(grid.permute(1, 2, 0).reshape(-1, 2))
        ax1.imshow(score[:,0].reshape(200, 200).detach().cpu(), cmap='hot', extent=[-3,3,-3,3])
        ax2.imshow(score[:,0].reshape(200, 200).detach().cpu(), cmap='hot', extent=[-3,3,-3,3], alpha=0.2)

        ax2.set_title('training data')
        ax2.set_ylim(-3, 3)
        ax2.set_xlim(-3, 3)
        ax2.set_aspect(1.0)
        ax2.scatter([d[0] for d, l in zip(self.data, self.labels) if l > 0],
                    [d[1] for d, l in zip(self.data, self.labels) if l > 0])
        ax2.scatter([d[0] for d, l in zip(self.data, self.labels) if l <= 0],
                    [d[1] for d, l in zip(self.data, self.labels) if l <= 0])
        ax2.add_patch(plt.Circle(datum, 0.1, color='#FF0000' if not ok else '#00FF00', linewidth=3, fill=False))

        ax3.set_title('model weights')
        w = net.summation.weight.cpu().detach()
        lim = max(5, w.abs().max() * 1.05)
        ax3.set_ylim(-lim, lim)
        ax3.set_xlim(-lim, lim)
        ax3.set_aspect(1.0)
        ax3.arrow(0, 0, w[0, 0], w[0, 1], width=0.02, head_width=0.2, color='purple', length_includes_head=True)
        d = label * datum
        if not ok:
            ax3.arrow(w[0, 0], w[0, 1], d[0], d[1], width=0.02, head_width=0.2, color='r', length_includes_head=True)
        


## Perceptron Algorithm Learning on Separable Data

The following code is a demo of Rosenblatt's Perception algorithm learning to classify linearly separable data.

In [None]:
pw = PerceptronVisualizingWidget(data, labels)
prng = numpy.random.RandomState(1)
from torch.nn import Sequential, Linear
from collections import OrderedDict

# A Two-neuron perceptron.
net = Sequential(OrderedDict([
    ('summation', Linear(2, 1)),
    ('activation', Sign()),
]))

# Initialize with large weights so that training is gradual.
with torch.no_grad():
    for p in net.parameters():
        p[...] = 2 * torch.Tensor(prng.randn(p.numel())).view(p.shape)
        
# The perceptron algorithm.
for it in range(500):
    i = prng.randint(len(data))
    y = labels[i]
    x = data[i]
    pred = net(x[None])
    ok = pred.item() == y
    pw.add(net, x, ok, y)
    if not ok:
        # Only update weights if the data point is wrong.
        with torch.no_grad():
            net.summation.weight += y * x
            # Question for students: why isn't the following line in Rosenblatt's book?
            net.summation.bias += y

show(pw)

In [None]:
import time
for i in range(500):
    pw.plot.index = i
    time.sleep(0.1)

## Perceptron Algorithm Failing on non-Separable Data

The following code is a demo of the behavior of Rosenblatt's Perception algorithm when applied on non-linearly separable data.

In [None]:
#labels2 = torch.Tensor(numpy.stack([(d.norm() < 1.0) for d in data])) * 2 - 1
labels2 = torch.Tensor(numpy.stack([(d[0].sign() == d[1].sign()) for d in data])) * 2 - 1
pw2 = PerceptronVisualizingWidget(data, labels2)
prng = numpy.random.RandomState(1)

# A Two-neuron perceptron.
net2 = Sequential(OrderedDict([
    ('summation', Linear(2, 1)),
    ('activation', Sign()),
]))

# Initialize with large weights so that training is gradual.
with torch.no_grad():
    for p in net2.parameters():
        p[...] = 2 * torch.Tensor(prng.randn(p.numel())).view(p.shape)
        
# The perceptron algorithm.
for it in range(500):
    i = prng.randint(len(data))
    y = labels2[i]
    x = data[i]
    pred = net(x[None])
    ok = pred.item() == y
    pw2.add(net2, x, ok, y)
    if not ok:
        # Only update weights if the data point is wrong.
        with torch.no_grad():
            net2.summation.weight += y * x
            # Question for students: why isn't the following line in Rosenblatt's book?
            net2.summation.bias += y
show(pw2)

In [None]:
import time
for i in range(500):
    pw2.plot.index = i
    time.sleep(0.1)