In [None]:
%matplotlib inline

# Multi Layer Perceptron (MLP)

<img src="Images/Figure 3.1.jpg" width="80%">

Here are some additional resources (beyond what we cover in class)

* Gradient descent, how neural networks learn
  * https://www.youtube.com/watch?v=IHZwWFHWa-w&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&t=11s&index=3

* Backpropagation calculus
  * https://www.youtube.com/watch?v=tIeHLnjs5U8&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&t=389s&index=5

* What is backpropagation really doing
  * https://www.youtube.com/watch?v=Ilg3gGewQ5U&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&t=532s&index=4

### Modern 'language'(s) for implementing a MLP 

There are many web resources nowadays for learning languages like PyTorch and TensorFlow, e.g.,

  * https://adventuresinmachinelearning.com/pytorch-tutorial-deep-learning/

On this page, we are going to create a simple MLP in PyTorch.

Specifically, we will look at the XOR problem

In [None]:
# include our Python packages
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.autograd import Function
import torch.optim as optim
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

##############################################
# declare the MLP
##############################################

# lets make a simple MLP
class XORMlp(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(XORMlp, self).__init__()
        self.linear1 = nn.Linear(D_in, H) # input to hidden layer
        self.linear2 = nn.Linear(H, D_out) # hidden layer to output
    def forward(self, x):
        h_pred = F.relu(self.linear1(x)) # h = dot(input,w1) 
                                         #  and nonlinearity (relu)
        y_pred = self.linear2(h_pred) # network_output = dot(h,w2)
        return y_pred

##############################################
# create an instance and set up optimization
##############################################

# here is a network with 2 inputs to 4 hidden neurons to one output neuron    
D_in, H, D_out = 2, 4, 1    
net = XORMlp(D_in, H, D_out)

# now, optimization and draw stuff (look at perceptron Jupyter pages)

def criterion(out,label):
    return (label - out)**2

optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.3)

##############################################
# xor data set
##############################################

data = torch.randn(4,2)
data[0,0] = 0; data[0,1] = 0;
data[1,0] = 1; data[1,1] = 1;
data[2,0] = 0; data[2,1] = 1;
data[3,0] = 1; data[3,1] = 0;

L = torch.randn(4)
L[0] = 0; L[1] = 0
L[2] = 1; L[3] = 1

##############################################
# training
##############################################

for epoch in range(1500):
    for i in range(4):
        X = Variable(data[i,:])
        Y = Variable(L[i])
        optimizer.zero_grad()
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()

print(net(Variable(torch.Tensor([[[0,0]]]))))
print(net(Variable(torch.Tensor([[[1,0]]]))))
print(net(Variable(torch.Tensor([[[0,1]]]))))
print(net(Variable(torch.Tensor([[[1,1]]]))))

Last, lets draw it using PyTorch

In [None]:
# Plot decision boundary
x_min, x_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1
y_min, y_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1
spacing = min(x_max - x_min, y_max - y_min) / 100
XX, YY = np.meshgrid(np.arange(x_min, x_max, spacing),
               np.arange(y_min, y_max, spacing))
datax = np.hstack((XX.ravel().reshape(-1,1), 
                  YY.ravel().reshape(-1,1)))
data_t = torch.FloatTensor(datax)
db_prob = net(data_t)
clf = np.where(db_prob<0.5,0,1)
Z = clf.reshape(XX.shape)
plt.figure(figsize=(12,8))
plt.contourf(XX, YY, Z, cmap=plt.cm.Accent, alpha=0.5)
plt.scatter(data[:,0], data[:,1], c=L, cmap=plt.cm.Accent)
plt.show()

# Example 2: Another Way to Write It, Yup

This page simply looks at PyTorch's nn.Sequential container!
  * https://pytorch.org/docs/stable/nn.html
  * https://www.programcreek.com/python/example/107650/torch.nn.Sequential

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.autograd import Function
import torch.optim as optim
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

############################################
# Here is what's different in this code!!!
#   self.layers = nn.Sequential
############################################
class AnotherMLP(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(AnotherMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(D_in, H),
            nn.Tanh(),  # nn.ReLU()
            nn.Linear(H, D_out)
        )        
    def forward(self, x):
        y_pred = self.layers(x)
        return y_pred
############################################

D_in, H, D_out = 2, 2, 1    
net = AnotherMLP(D_in, H, D_out)

def criterion(out,label):
    return (label - out)**2

############################################
# threw in a new learner for you!
############################################
optimizer = optim.Adam(net.parameters(), lr=1e-1)

# xor data set
data = torch.randn(4,2)
data[0,0] = 0; data[0,1] = 0;
data[1,0] = 1; data[1,1] = 1;
data[2,0] = 0; data[2,1] = 1;
data[3,0] = 1; data[3,1] = 0;

L = torch.randn(4)
L[0] = 0; L[1] = 0
L[2] = 1; L[3] = 1

for epoch in range(2000):
    for i in range(4):
        X = Variable(data[i,:])
        Y = Variable(L[i])
        optimizer.zero_grad()
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()

x_min, x_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1
y_min, y_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1
spacing = min(x_max - x_min, y_max - y_min) / 100
XX, YY = np.meshgrid(np.arange(x_min, x_max, spacing),
               np.arange(y_min, y_max, spacing))
datax = np.hstack((XX.ravel().reshape(-1,1), 
                  YY.ravel().reshape(-1,1)))
data_t = torch.FloatTensor(datax)
db_prob = net(data_t)
clf = np.where(db_prob<0.5,0,1)
Z = clf.reshape(XX.shape)
plt.figure(figsize=(12,8))
plt.contourf(XX, YY, Z, cmap=plt.cm.Accent, alpha=0.5)
plt.scatter(data[:,0], data[:,1], c=L, cmap=plt.cm.Accent)
plt.show()

# Reflection

1) Change the nonlinearity

2) Try different #s of layers and neurons

3) Make a different data set

4) Does it matter what labels you pick? e.g., {0,1} or {-1,1}