In [None]:
%matplotlib inline

# Perceptron in PyTorch

The goal of this Jupyter Notebook page is to code up the perceptron in PyTorch

There are MANY ways we can achieve this, the following is just one solution. More on the web if you care, e.g.,

  * https://medium.com/@tomgrek/building-your-first-neural-net-from-scratch-with-pytorch-56b0e9c84d54
  * https://pytorch.org/docs/stable/nn.html
  
If you already know PyTorch, maybe your kung fu is greater than mine... I tried to keep this readable also (vs linear algebra-ized)

In [None]:
import torch # this is our pytorch lib
from torch.autograd import Variable # need this for variables and autograd is for automatically working with gradients (its really nice)
import torch.nn as nn # this is the neural net part of torch
import torch.nn.functional as F # will use this below for some nn function stuff

Lets define a simple neuron class

In [None]:
class Perceptron(nn.Module): # I named the Python class Perceptron
    def __init__(self): # initilization function (all classes have a __init__ you can override)
        super(Perceptron, self).__init__() # calls the super function (from nn.Module)
                                           # self is this "instance"
        self.fc1 = nn.Linear(1,1,True) # https://pytorch-zh.readthedocs.io/en/latest/nn.html
                                  # applies a linear transformation to the incoming data (aka our dot product!)
                                  # nn.Linear(num of inputs, num of outputs, bias = True or False)
                                  # holds internal Tensors for its weight and bias
                                  # this primes the pump for things to come (i.e., we will call it in forward fx)
    def forward(self, x): # this is the forward neural net pass
        x = self.fc1(x) # take input x and call "fc1" (from __init__), which holds onto our linear equation
        return x

Wow, that was simple!!!!

Few quick remarks

  * Keep all code in *PyTorch talk* (we will talk about this more later), meaning all functions are *PyTorch supported*, and PyTorch will calc the gradient (so we can train our net!) for "free"! (well, its not magic, it gets it from autograd)
    * i.e., you do NOT need to define the backward pass with hand calc derivatives 
  * Neat, right!!! ...
    * scary for your Prof, who wants to make sure you understand every calc and do all this outside PyTorch... 

If you want to dig deeper

  * Read https://pytorch.org/docs/stable/nn.html
    * CLASS torch.nn.Linear(in_features, out_features, bias=True)
  * Read https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html 
    * Custom gradients
      * https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
      * https://jhui.github.io/2018/02/09/PyTorch-Variables-functionals-and-Autograd/

Lets look at what we got

In [None]:
# create an instance of our class
net = Perceptron()
print(net)

OK, shows the variables and their values

Next, lets see our parameters

In [None]:
print(list(net.parameters()))

FYI, nice helper site: https://pytorch.org/docs/stable/torch.html

Next, lets create a variable, we want to pass data into this net.

A Variable wraps a Tensor. It supports nearly all the API’s defined by a Tensor.

In [None]:
input = Variable(torch.randn(1,1,1), requires_grad=False)
print(input)

Setting requires_grad means it’s an optimizable variable (e.g., w.r.t. autograd). We don't need that here on our input. 

Next, lets put the input into our neuron and see what we get

In [None]:
out = net(input)
print(out)

Well, that makes sense if you think about it. (0.7544 * -0.1329 ) + -0.3152 = -0.4154

Of course that was for the random numbers I got. You would have to use what your Jupyter execution of that code gave to verify the output

Next up, define a loss function and an optimizer using stochastic gradient descent (SGD)

In [None]:
import torch.optim as optim

# defining our own custom function: sum of squared error (SSE)
# you can also just call their built in ones
def criterion(out, label):
    return (label - out)**2

optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)

print(optimizer)

Then, define a training dataset.

In [None]:
data = [(1,3), (2,6), (3,9), (4,12), (5,15), (6,18)]
print(data)

Now, lets do some training

In [None]:
NEpochs = 20
for epoch in range(NEpochs): 
    for i, data2 in enumerate(data):
        
        # pulls out each sample (iterates over the list)
        X, Y = iter(data2) 
        # now, we make a variable out of this stuff
        X, Y = Variable(torch.FloatTensor([X]), requires_grad=True), Variable(torch.FloatTensor([Y]), requires_grad=False)
        # sets gradients of all model parameters to zero
        optimizer.zero_grad()
        # eval data
        outputs = net(X)
        # calc our error
        loss = criterion(outputs, Y)
        # run backward pass
        loss.backward()
        # take a step in optimization
        optimizer.step()
        
    print("Epoch {} - loss: {}".format(epoch, loss.data[0]))

What solution did we get?

In [None]:
print(list(net.parameters()))

Lets predict a value

In [None]:
print(net(Variable(torch.Tensor([[[1]]]))))

### Example 2: PyTorch Perceptron on a more complicated (ok, slightly...) data set 

Lets do two class data and use PyTorch for solution

In [None]:
import torch
from torch.autograd import Variable
from torch.autograd import Function
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import matplotlib
import matplotlib.pyplot as plt

# our neural net class
class Perceptron(nn.Module):
    def __init__(self):
        super(Perceptron, self).__init__()
        self.fc1 = nn.Linear(2,1,True)
    def forward(self, x):
        x = self.fc1(x)
        return x
    
# make the neuron
net = Perceptron()
# net.cuda() # GPU acceleration
print("net");
print(net)
print("net parameters")
print(list(net.parameters()))

# criteria fx
def criterion(out,label):
    return (label - out)**2

# setup SGD optimization
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)

# make some synthetic data
N = 100
Nhalf = int( N / 2 )
data = torch.randn(N,2) 
for i in range(Nhalf): # class 1
    data[i,0] = data[i,0] * 0.1 + 0.2
    data[i,1] = data[i,1] * 0.1 + 0.2
for i in range(Nhalf): # class 2
    data[i+Nhalf,0] = data[i+Nhalf,0] * 0.1 + -0.2
    data[i+Nhalf,1] = data[i+Nhalf,1] * 0.1 + 0.8
    
# show what that data looks like
dataForPlot = np.asarray( data ) 
fig = plt.figure()
plt.plot( dataForPlot[0:Nhalf-1,0], dataForPlot[0:Nhalf-1,1], 'rx' )
plt.plot( dataForPlot[Nhalf:N,0], dataForPlot[Nhalf:N,1], 'mx' )
plt.axis('equal')
plt.show()

# labels
L = torch.ones(N)
for i in range(Nhalf): # class 1
    L[i] = 1
for i in range(Nhalf): # class 2
    L[i+Nhalf] = -1

# train
for epoch in range(500):
    for i in range(N):
        X = Variable(data[i,:])
        Y = Variable(L[i])
        optimizer.zero_grad()
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
print( "Epoch {} - loss: {}".format(epoch, loss.data[0]))
print(list(net.parameters()))

Intercepts are?

In [None]:
params = list(net.parameters())
print(len(params))
print(params[0].size())
print(params[0])
w = params[0].data.cpu().numpy()[0]
print(w)
print(params[1].size())
print(params[1])
bias = params[1].data.cpu().numpy()[0]
print(bias)

print( "x-axis = -bias / weight1 = " + str(-bias / w[0]) )
print( "y-axis = -bias / weight2 = " + str(-bias / w[1]) )

Lets now plot the perceptron

In [None]:
sx = ( 0 - (-bias / w[0]) )
sy = ( (-bias / w[1]) - 0 )
x = np.linspace(0,1,100)
y = (sy/sx)*x + (-bias / w[1])

dataForPlot = np.asarray( data ) 
fig = plt.figure()
plt.plot( dataForPlot[0:Nhalf-1,0], dataForPlot[0:Nhalf-1,1], 'rx' )
plt.plot( dataForPlot[Nhalf:N,0], dataForPlot[Nhalf:N,1], 'mx' )
plt.plot( [-bias / w[0],0] , [0,-bias / w[1]] , 'k' )
plt.plot(x, y, '-c')
plt.axis('equal')
plt.show()

Lets draw with PyTorch...

In [None]:
# Plot the decision boundary
# Determine grid range in x and y directions
x_min, x_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1
y_min, y_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1

# Set grid spacing parameter
spacing = min(x_max - x_min, y_max - y_min) / 100

# Create grid
XX, YY = np.meshgrid(np.arange(x_min, x_max, spacing),
               np.arange(y_min, y_max, spacing))

# Concatenate data to match input
datax = np.hstack((XX.ravel().reshape(-1,1), 
                  YY.ravel().reshape(-1,1)))

# Pass data to predict method
data_t = torch.FloatTensor(datax)
db_prob = net(data_t)

clf = np.where(db_prob<0,0,1)

Z = clf.reshape(XX.shape)

plt.figure(figsize=(8,8))
plt.contourf(XX, YY, Z, cmap=plt.cm.Accent, alpha=0.5)
plt.scatter(data[:,0], data[:,1], c=L, 
            cmap=plt.cm.Accent)
plt.show()

# Reflection

Address the following

1) Add a nonlinearity/activation function

2) Use a different optimization algorithm (not SGD) and use a different error/cost function