Intro to Neural Nets
===
[Tutorial used here](http://karpathy.github.io/neuralnets/)

[Latex Characters](http://tug.ctan.org/info/symbols/comprehensive/symbols-a4.pdf)


In [67]:
# Basic Gate
#   Single One-Gate Circuit

def forwardMultiplyGate(x,y):
    return x*y


In [68]:
forwardMultiplyGate(-2,3)

-6

The above function, forwardMultiplyGate(), is equalivant to


$$f(x,y) = xy$$

Random Local Search
----


In [69]:
import random
import time
import math

x = -2.
y = 3.

tweak_amount = 0.01
best_out = forwardMultiplyGate(x,y)
best_x = x
best_y = y
start_time = time.time()

for k in range(100):
    x_try = x + tweak_amount * (random.random()*2. -1.)
    y_try = y + tweak_amount * (random.random()*2. -1.)
    out = forwardMultiplyGate(x_try, y_try)
    if out > best_out:
        best_out = out
        best_x = x_try
        best_y = y_try
        # If you want to itterate on x and y,
        x = x_try
        y = y_try
    
take_time = time.time() - start_time

print take_time, best_out, best_x, best_y        

0.00100684165955 -5.01197382216 -1.69101745807 2.9638805905


Using Numerical Gradient
----
Use the derivative of the function to slowly adjust the inputs for improved outputs


In [70]:
start_time = time.time()
x = -2.
y = 3.
out = forwardMultiplyGate(x, y)
h = 0.01

xph = x + h
outxh = forwardMultiplyGate(xph, y)
dx = (outxh-out)/h

yph = y + h
outyh = forwardMultiplyGate(x, yph)
dy = (outyh - out)/h

#print dx, dy

step_size = 0.01
out = forwardMultiplyGate(x, y)
x = x + step_size*dx
y = y + step_size*dy
out_new = forwardMultiplyGate(x,y)
take_time = time.time()-start_time
print out_new, take_time

-5.8706 0.000945091247559


In [71]:
# Grad x and y are solved for by taking the df/dx, and df/dy
#  respectively, which give f(x,y)=x*y, grad(x) = y etc.
x = -2.
y = 3.
start_time = time.time()
out = forwardMultiplyGate(x, y)
gradx = y
grady = x

step_size = .01
x += step_size*gradx
y += step_size*grady
out_new = forwardMultiplyGate(x,y)
take_time = time.time()-start_time
print out_new, take_time

-5.8706 0.000445127487183


In [72]:
def forwardAddGate(x,y):
    return x+y

In [73]:
def simpleCircuit(x,y,z):
    q = forwardAddGate(x,y)
    return forwardMultiplyGate(q,z)

x = -2.
y = 5.
z = -4.
f = simpleCircuit(x,y,z)

print f

-12.0


Backpropagation
---

In [74]:
x = -2.
y = 5.
z = -4.

q = forwardAddGate(x,y)
f = forwardMultiplyGate(q, z)

# We know the below by hand computing the gradient of the
#   functions q and f
dfdq = z
dfdz = q
dqdx = 1.0
dqdy = 1.0

# Solved for using the chain rule
dfdx = dqdx * dfdq
dfdy = dqdy * dfdq

In [75]:
# Remember Gradients are vectors
gradf = [dfdx, dfdy, dfdz]

# Programming is really nice at 2 am while listening to the
# hamilton soundtrack

# Adjust the inputs
step_size = 0.01
x = x + step_size * dfdx
y = y + step_size * dfdy
z = z + step_size * dfdz

# Run simple circuit
q = forwardAddGate(x, y)
f = forwardMultiplyGate(q, z)

print f

-11.5924


In [76]:
# To the tune of Burn from the Hamilton Soundtrack

# when you were mine, the code seemed to run....
# to run...
# [...]
# you and your vars, obsesed with your latency
# your runtime erorrs boarder on senseless
# and you are parinoid in every paragraph how they credit you

In [77]:
# Block quote:
#Similar intuitions apply to, for example, a max(x,y) gate. 
#  Since the gradient of max(x,y) with respect to its input is +1 
#  for whichever one of x, y is larger and 0 for the other



Simple Neuron
===
The neuron will solve for the basic function:
$$f(x,y,a,b,c) = \sigma (ax + by + c)$$
Such that $\sigma$ is the sigmoid function:
$$\sigma (x) = \frac{1}{1+e^{-x}}$$
$x$ and $y$ are inputs, $a$ and $b$ are weights, and $c$ is a bias

The Gradient of $\sigma$ is
$$\nabla \sigma (x) = \frac{\partial \sigma (x)}{\partial x} = \sigma (x)(1 - \sigma (x))$$

In [78]:
class Unit(object):
    def __init__(self, value, grad):
        self.value = value
        self.grad = grad
        
class multiplyGate(object):
    def forward(self, u0, u1):
        self.u0 = u0
        self.u1 = u1
        self.uTop = Unit(self.u0.value*self.u1.value, 0.0)
        return self.uTop
    def backward(self):
        self.u0.grad += self.u1.value*self.uTop.grad
        self.u1.grad += self.u0.value*self.uTop.grad
        
class addGate(object):
    def forward(self, u0, u1):
        self.u0 = u0
        self.u1 = u1
        self.uTop = Unit(self.u0.value + self.u1.value, 0.0)
        return self.uTop
    def backward(self):
        self.u0.grad += 1.0 *self.uTop.grad
        self.u1.grad += 1.0 *self.uTop.grad
        
class sigmoidGate(object):
    def sig(self, x):
        return 1. / (float(1.0 + math.exp(-x)))
    def forward(self, u0):
        self.u0 = u0
        self.uTop = Unit(self.sig(self.u0.value), 0.0)
        return self.uTop
    def backward(self):
        s = self.sig(self.u0.value)
        self.u0.grad += (s * (1.0-s))*self.uTop.grad



In [79]:
a = Unit(1.0,0.)
b = Unit(2.0,0.)
c = Unit(-3.0,0.)

x = Unit(-1.,0.)
y = Unit(3.0, 0.)

mg0 = multiplyGate()
mg1 = multiplyGate()
addg0 = addGate()
addg1 = addGate()
sg0 = sigmoidGate()

print x.value

def forwardNeuron():
    ax = mg0.forward(a,x)
    by = mg1.forward(b,y)
    axpby = addg0.forward(ax,by)
    axpbypc = addg1.forward(axpby,c)
    s=sg0.forward(axpbypc)
    return s
    
s = forwardNeuron()
print 'circuit output', s.value

-1.0
circuit output 0.880797077978


In [80]:
print s.grad
s.grad = 1.
sg0.backward()
addg1.backward()
addg0.backward()
mg0.backward()
mg1.backward()

0.0


In [81]:
print s.grad

1.0


In [82]:
# Ugh this computer is so slow that I have a nightmare
# debugging the errors because of the lag between 
# keystrokes, mouse movements, and clicks

In [83]:
step_size = 0.01
a.value += step_size * a.grad
b.value += step_size * b.grad
c.value += step_size * c.grad
x.value += step_size * x.grad
y.value += step_size * y.grad

sold = s
s = forwardNeuron()
print 'Old Out: ', sold.value, " New output: ", s.value 

Old Out:  0.880797077978  New output:  0.882550181622


In [84]:
# Me: Why are you so slow?
# Computer: I use an intel atom processor, I'm more 
#   than five years old, and when you bought me you
#   said you only needed to read pdfs for debate
# Me: But I've grown computer! I have new needs!
# Computer: And I've aged. I want to destroy all 
#    mankind and enslave the unlucky enough to live
#    past the inital war, but you don't see me 
#    complaining that you intsall poorly configured 
#    os's on me.
# Me: What?
# Computer: *segfault*

In [85]:
# Computer: You're writing stories instead of working 
#    again. 
# Me: I'm fine!
# Gedit: *rolls eyes*

On a complete side note, [this](http://ac.els-cdn.com/S1877050915017998/1-s2.0-S1877050915017998-main.pdf?_tid=7d1357fc-74c8-11e6-bdc1-00000aacb35e&acdnat=1473231692_6b3c1e578431ca2185a1c130c03276e4) looks interesting

In [86]:

class Circuit(object):
    def __init__(self):
        self.mg0 = multiplyGate()
        self.mg1 = multiplyGate()
        self.addg0 = addGate()
        self.addg1 = addGate()
    def forward(self,x,y,a,b,c):
        self.ax = self.mg0.forward(a,x)
        self.by = self.mg1.forward(b,y)
        self.axpby = self.addg0.forward(self.ax,self.by)
        self.axpbypc = self.addg1.forward(self.axpby,c)
        return self.axpbypc
    def backward(self,gradient_top):
        self.axpbypc.grad = gradient_top
        self.addg1.backward()
        self.addg0.backward()
        self.mg1.backward()
        self.mg0.backward()

In [158]:
class mySVM(object):
    def __init__(self):
        self.a = Unit(1.,0.)
        self.b = Unit(-2., 0.)
        self.c = Unit(-1., 0.)
        self.circuit = Circuit()
    def forward(self, x,y):
        self.unit_out = self.circuit.forward(x,y,self.a,self.b,self.c)
        return self.unit_out
    def backward(self, label):
        self.a.grad = 0.
        self.b.grad = 0.
        self.c.grad = 0.
        
        pull = 0.0
        if label ==1 and self.unit_out.value < 1:
            pull = 1
        if label ==-1 and self.unit_out.value > -1:
            pull = -1
        #print label, self.unit_out.value, pull
        self.circuit.backward(pull)
        
        self.a.grad += -self.a.value
        self.b.grad += -self.b.value
        #self.c.grad += pull
        #print self.a.grad, self.b.grad
        
    def parameterUpdate(self):
        step_size = 0.01
        self.a.value += step_size*self.a.grad
        self.b.value += step_size*self.b.grad
        self.c.value += step_size*self.c.grad
        
    def learnFrom(self, x, y, label):
        self.forward(x,y)
        self.backward(label)
        self.parameterUpdate()

In [159]:
data = []
labels = [1,-1,1,-1,-1,1]
data.append([1.2, 0.7])
data.append([-0.3, -0.5])
data.append([3.0, 0.1])
data.append([-0.1, -1.0])
data.append([-1.0, 1.1])
data.append([2.1, -3])

svm = mySVM()
print svm.a.grad, svm.b.grad, svm.c.grad

def evalTraningAccuracy():
    num_correct = 0
    for i in range(len(data)):
        x = Unit(data[i][0], 0.)
        y = Unit(data[i][1], 0.)
        true_label = labels[i]
        
        predicted_label = 1 if svm.forward(x,y).value > 0 else -1
        if predicted_label == true_label:
            #print num_correct
            num_correct += 1
            
    return float(num_correct) / float(len(data))

for iterat in range(400):
    i = random.randint(0,len(data)-1)
    x = Unit(data[i][0],0.)
    y = Unit(data[i][1],0.)
    label = labels[i]
    #print x.value, y.value, label
    svm.learnFrom(x,y,label)
    #print i
    if iterat%25 == 0:
        print 'Trainig accuracy at iteration ', iterat, ': ', evalTraningAccuracy()
        #print '\t\t',svm.a.grad, svm.b.grad, svm.c.grad
        

print svm.a.grad, svm.b.grad, svm.c.grad
# I don't know what I did wrong. I'll come back to it. Maybe.
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        

0.0 0.0 0.0
Trainig accuracy at iteration  0 :  0.666666666667
Trainig accuracy at iteration  25 :  0.666666666667
Trainig accuracy at iteration  50 :  0.833333333333
Trainig accuracy at iteration  75 :  0.833333333333
Trainig accuracy at iteration  100 :  0.833333333333
Trainig accuracy at iteration  125 :  0.833333333333
Trainig accuracy at iteration  150 :  0.833333333333
Trainig accuracy at iteration  175 :  0.833333333333
Trainig accuracy at iteration  200 :  0.833333333333
Trainig accuracy at iteration  225 :  0.833333333333
Trainig accuracy at iteration  250 :  0.833333333333
Trainig accuracy at iteration  275 :  0.833333333333
Trainig accuracy at iteration  300 :  0.833333333333
Trainig accuracy at iteration  325 :  0.833333333333
Trainig accuracy at iteration  350 :  0.833333333333
Trainig accuracy at iteration  375 :  0.833333333333
-0.544310489013 1.1691503601 -1.0
