In [3]:
%matplotlib notebook
from ipywidgets import *
import numpy as np
import matplotlib.pyplot as plt

# Universal Approximation Theory 
Neural Networks are universal function approximators.
This talk tries to give some intuition on why that should be true and uses interactive graphs, gifs and some maths to do it. 

## Picking up where we left off
Now we have made a perceptron, however the one we made might be a bit different to ones you are farmiliar with, maybe you were expecting to hear some mention of "sigmoids". 

# Introducing the sigmoid function
Sigmoid is a *non linear* function we can apply to the output of our perceptron.

In [4]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def hidden_n(x,w,b):
    y = w*x+b
    return sigmoid(y)

hidden_n(1,1,1)

0.8807970779778823

In [5]:
# Lets take a look at what a sigmoid looks like
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(hidden_n)
sigma = sigma_fn(z,1,1)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, sigma)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('Sigmoid function')

plt.show()

<IPython.core.display.Javascript object>

To see what this function can do, lets have a play around with the parameters. 

In [6]:
# Plotting the output of a hidden neuron 
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(hidden_n)
sigma = sigma_fn(z,1,1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(z, sigma)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('Sigmoid function')

def update(w = 1.0, b = 1.0):
    line.set_ydata(sigma_fn(z,w,b))
    fig.canvas.draw_idle()

interact(update);
fig.show()

<IPython.core.display.Javascript object>

interactive(children=(FloatSlider(value=1.0, description='w', max=3.0, min=-1.0), FloatSlider(value=1.0, descr…

Try increasing the weight of the below neuron and looking at the result. 

In [7]:
# Plotting the output of a hidden neuron 
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(hidden_n)
sigma = sigma_fn(z,1,1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(z, sigma)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('Sigmoid function')

def update(w = 1.0, b = 1.0):
    line.set_ydata(sigma_fn(z,w,b))
    fig.canvas.draw_idle()

    

slow = interactive(update,w=FloatSlider(min=1, max=30, step=1), b =FloatSlider(min=1, max=30, step=1));
fig.show()
slow

<IPython.core.display.Javascript object>

interactive(children=(FloatSlider(value=1.0, description='w', max=30.0, min=1.0, step=1.0), FloatSlider(value=…

We get a step function. 

# Explanation of functions for Universal Approximation Theory 
https://www.khanacademy.org/math/linear-algebra/matrix-transformations/linear-transformations/a/visualizing-linear-transformations


# Solving NAND
A NAND gate is a fundamental logic gate in computing. 

![NAND Gate](https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Faccendoreliability.com%2Fwp-content%2Fuploads%2F2017%2F07%2FLogic-gate-nand-us.png&f=1&nofb=1)

They have the following properties, given inputs A, B they output Y. 

|A|B|Y|
|-|-|-|
|0|0|1|
|0|1|1|
|1|0|1|
|1|1|0|

It turns out that if you have enough NAND gates you could build a whole computer, from the ALU (Arithmetic and Logic Unit) to the storage (Flip Flops), so if we can make a binary classifier that replicates a NAND, we know we have found a good building block.

In [9]:
def hidden_XY(x,y,w,b):
    out = w*x + w*y + b
    return out

def perceptron(x,y,w,b):
    o = hidden_XY(x,y,w,b)
    if (o > 0):
        return 1
    else:
        return 0

In [10]:
# Plotting the output of a hidden neuron 
x = np.arange(-0.4, 1.5, .05)
y = np.arange(-0.4, 1.5, .05)
sigma_fn = np.vectorize(perceptron)
sigma = sigma_fn(x,y,1,1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(x, sigma)

# Our inputs we want to give 1 (blue)
positive = np.array([
    [0,0],
    [0,1],
    [1,0],
])
a, b = positive.T
plt.scatter(a,b)

# Our inputs which we want to give 0 (red)
negative = np.array([
    [1,1],
])
a, b= negative.T
plt.scatter(a,b)

ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-0.5,1.5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('Sigmoid function')

def update(w = 1.0, b = 1.0):
    line.set_ydata(sigma_fn(x,y,w,b))
    fig.canvas.draw_idle()

slow = interactive(update,w=FloatSlider(min=-3, max=3, step=0.1), b =FloatSlider(min=-3, max=3, step=0.1));
fig.show()
slow

<IPython.core.display.Javascript object>

interactive(children=(FloatSlider(value=0.0, description='w', max=3.0, min=-3.0), FloatSlider(value=0.0, descr…

In [13]:
w = -2
b = 3
print("A,B,Y")
print(0,0,perceptron(0,0,w,b))
print(0,1,perceptron(0,1,w,b))
print(1,0,perceptron(1,0,w,b))
print(1,1,perceptron(1,1,w,b))

A,B,Y
0 0 1
0 1 1
1 0 1
1 1 0


|A|B|Y|
|-|-|-|
|0|0|1|
|0|1|1|
|1|0|1|
|1|1|0|

Note that if the line is touching the point, we classify that as a 1. 

# We have solved NAND!
That wasn't so hard. 

# What can we do with 2 Perceptrons:

In [11]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def hidden_n(x,w,b):
    y = w*x+b
    return sigmoid(y)

def two_layer(x, weight1, bias1, weight2, bias2, weight1_1, weight1_2):
    neuron1 = hidden_n(x,weight1, bias1)
    neuron2 = hidden_n(x,weight2, bias2)
    one = weight1_1*neuron1
    two = weight1_2*neuron2
    return one + two

# Here we see we have managed to build a bump.
If we kept building these bumps on top of each other, we can see how just like in harmonics we can build any function. 

In [12]:
%matplotlib notebook
from ipywidgets import *
import matplotlib.pyplot as plt
import numpy as np

# Plotting the output of a hidden neuron 
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(two_layer)
sigma = sigma_fn(z,1,1,1,1,1,1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(z, sigma)
ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('2 Neuron Sigmoid function')

def update(w1 = 1.0,w2=1.0, b1 =1.0, b2=1.0, w11=1.0, w12=1.0):
    line.set_ydata(sigma_fn(z, w1,b1, w2, b2, w11, w12))
    fig.canvas.draw_idle()

interact(update);
fig.show()

<IPython.core.display.Javascript object>

interactive(children=(FloatSlider(value=1.0, description='w1', max=3.0, min=-1.0), FloatSlider(value=1.0, desc…

![Universal Approximation Proof](https://i.stack.imgur.com/6yngK.gif)

# Universal Approximation Theory:

Neural networks can approximate any bounded function. 

[Guide on universal approximation theory](http://neuralnetworksanddeeplearning.com/chap4.html)


# Why approximate?

![image.png](https://upload.wikimedia.org/wikipedia/commons/a/af/Fourier_synthesis_square_wave_animated.gif)


# Why bounded?

Computation is limited anyway.


You can only compute with numbers you can store. 

### Demos Demo with Macularin Expansion of Sine. 
When you pass data into this function for bounds it is not meant for, you're not going to get good results. 


# Why this doesn't matter?

# Other gates:
NAND is just one type of logic gate, it turns out there are many. 
XOR is another type of gate that has these properties:

|A|B|Y|
|-|-|-|
|0|0|0|
|0|1|1|
|1|0|1|
|1|1|0|

# Can we solve XOR?

In [21]:
# Plotting the output of a hidden neuron 
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(two_layer)
sigma = sigma_fn(z,1,1,1,1,1,1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(z, sigma)

# Our inputs we want to give 1 (blue)
positive = np.array([
    [0,1],
    [1,0],
])
a, b = positive.T
plt.scatter(a,b)

# Our inputs which we want to give 0 (red)
negative = np.array([
    [0,0],
    [1,1]
])
a, b= negative.T
plt.scatter(a,b)

ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('2 Neuron Sigmoid function')

def update(w1 = 1.0,w2=1.0, b1 =1.0, b2=1.0, w11=1.0, w12=1.0):
    line.set_ydata(sigma_fn(z, w1,b1, w2, b2, w11, w12))
    fig.canvas.draw_idle()

interact(update);
fig.show()

<IPython.core.display.Javascript object>

interactive(children=(FloatSlider(value=1.0, description='w1', max=3.0, min=-1.0), FloatSlider(value=1.0, desc…

In [13]:
z = np.arange(-5, 5, .1)
sigma_fn = np.vectorize(two_layer)
# sigma_fn(z, w1,b1, w2, b2, w11, w12)
sigma = sigma_fn(z,1,-4,-1,-4,1,-1)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
line, = ax.plot(z, sigma)

# Our inputs we want to give 1 (blue)
positive = np.array([
    [0,1],
    [1,0],
])
a, b = positive.T
plt.scatter(a,b)

# Our inputs which we want to give 0 (red)
negative = np.array([
    [0,0],
    [1,1]
])
a, b= negative.T
plt.scatter(a,b)

ax.set_ylim([-0.5, 1.5])
ax.set_xlim([-0.5,1.5])
ax.grid(True)
ax.set_xlabel('z')
ax.set_title('2 Neuron Sigmoid function')


<IPython.core.display.Javascript object>

Text(0.5, 1.0, '2 Neuron Sigmoid function')

# Mention of projections (dot product)
Show the UCL Slides. 

