In [None]:
import numpy as np
import random

def sigmoid(x): 
    return 1/(1+np.exp(-x))

def sigmoid_der(x):
    return x*(1-x)

class XOR:
    def __init__(self, inputs):
        random.seed(1)
        self.inputs=inputs
        self.l=len(self.inputs) # equals 4, number of set of inputs
        self.li=len(self.inputs[0]) #equals 2, number of input units
        
        self.wi=np.random.random((self.li,self.l)) #equals (2,4)
        self.wh=np.random.random((self.l,1)) #equals (4, 1)
    
    def think(self, inp):
        s1=sigmoid(np.dot(inp, self.wi))
        s2=sigmoid(np.dot(s1, self.wh))
        return s2
    
#back propagation:
    def train(self, inputs, outputs, it):
        for i in range(it):
            l0=inputs
            l1=sigmoid(np.dot(l0, self.wi))
            l2=sigmoid(np.dot(l1, self.wh))
            
            l2_err=outputs - l2
            l2_delta=np.multiply(l2_err, sigmoid_der(l2))
            
            l1_err=np.dot(l2_delta, self.wh.T)
            l1_delta=np.multiply(l1_err, sigmoid_der(l1))
            
            self.wh+=np.dot(l1.T, l2_delta)
            self.wi+=np.dot(l0.T, l1_delta)

inputs=np.array([[0,0] , [0,1] , [1,0] , [1,1]])
outputs=np.array( [ [0] , [1] , [1] , [0] ] )    
n=XOR(inputs)
print("Before training: ")
print(n.think(inputs))
n.train(inputs, outputs, 10000)
print("After training: ")
print(n.think(inputs))

In [None]:
import numpy as np

# X = (hours sleeping, hours studying), y = score on test
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)

# scale units
X = X/np.amax(X, axis=0) # maximum of X array
y = y/100 # max test score is 100

class Neural_Network(object):
    def __init__(self):
    #parameters
        self.inputSize = 2
        self.outputSize = 1
        self.hiddenSize = 3

    #weights
        self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to hidden layer
        self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to output layer

    def forward(self, X):
    #forward propagation through our network
        self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights
        self.z2 = self.sigmoid(self.z) # activation function
        self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights
        o = self.sigmoid(self.z3) # final activation function
        return o 

    def sigmoid(self, s):
    # activation function 
        return 1/(1+np.exp(-s))

    def sigmoidPrime(self, s):
    #derivative of sigmoid
        return s * (1 - s)

    def backward(self, X, y, o):
    # backward propgate through the network
        self.o_error = y - o # error in output
        self.o_delta = self.o_error*self.sigmoidPrime(o) # applying derivative of sigmoid to error

        self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights contributed to output error
        self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error

        self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
        self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights

    def train (self, X, y):
        o = self.forward(X)
        self.backward(X, y, o)

NN = Neural_Network()
for i in range(1000): # trains the NN 1,000 times
    print("Input: \n" + str(X)) 
    print("Actual Output: \n" + str(y))
    print( "Predicted Output: \n" + str(NN.forward(X)) )
    print ("Loss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss
    print ("\n")
    NN.train(X, y)

In [None]:
# -*- coding: utf-8
import numpy as np

# define sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)

    return 1/(1+np.exp(-x))


# input dataset 总共有4组输入
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])

# output dataset 四组输出
y = np.array([[0,1,1,0]]).T

#seed random numbers to make calculation
np.random.seed(1)

# initialize weights randomly with mean 0
#theta0 是一个3*4的参数矩阵,输入层有3个输入节点(神经元)，隐藏层有4个节点(神经元)，所以参数是3*4d的矩阵
theta0 = 2*np.random.random((3,4)) - 1

#theta1是一个4*1的参数矩阵，隐藏层有4个节点(神经元)，输出层有1个节点(神经元)
theta1 = 2*np.random.random((4,1)) - 1

for j in range(60000):
    a0 = X  #a0表示第一层(输入层)，a0的每一行表示一组输入数据
    a1 = nonlin(np.dot(a0,theta0)) #a0与theta0的相乘得到的就是z1，经过sigmod函数就是a1了。a1是4*4的矩阵。注意是批量运算，a1的每一行代表一组训练数据。
    a2 = nonlin(np.dot(a1,theta1)) #跟上一个语句一样的道理,a2是一个4*1的矩阵，每一行代表一组训练数据

    E = y - a2 #获得偏差E，4*1的矩阵，每一行代表一组训练数据

    if (j% 10000) == 0:
        print("Error: " + str(np.mean(np.abs(E))))

    a2_delta = E*nonlin(a2,deriv=True) #这才是点乘，对应元素相乘，a2_delta是一个4*1的矩阵，每一行代表一组训练数据
    a1_error = a2_delta.dot(theta1.T) # a1__error其实就是偏差对a1求偏导对应输出层的E，也是每一行代表一组训练数据
    a1_delta = a1_error*nonlin(a1,deriv=True)

    theta1 = theta1 + a1.T.dot(a2_delta)
    theta0 = theta0 + a0.T.dot(a1_delta)

print("output after training:")
print(a2)

In [8]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

vec_sigmoid = np.vectorize(sigmoid)

input_sz = 2;
hidden_sz = 3;
output_sz = 1;
theta1 = np.matrix(0.5 * np.sqrt(6.0 / (input_sz+hidden_sz)) * (np.random.rand(1+input_sz,hidden_sz)-0.5))
theta2 = np.matrix(0.5 * np.sqrt(6.0 / (hidden_sz+output_sz)) * (np.random.rand(1+hidden_sz,output_sz)-0.5))

def fit(x, y, theta1, theta2, learn_rate=.1):
    #forward pass
    layer1 = np.matrix(x, dtype='f')
    layer1 = np.c_[np.ones(1), layer1]
    # Binesh - for layer2 we need to add a bias term.
    layer2 = np.c_[np.ones(1), vec_sigmoid(layer1.dot(theta1))]
    layer3 = sigmoid(layer2.dot(theta2))

    #backprop
    delta3 = y - layer3
    # Binesh - In reality, this is the _negative_ derivative of the cross entropy function
    # wrt the _input_ to the final sigmoid function.

    delta2 = np.multiply(delta3.dot(theta2.T), np.multiply(layer2, (1-layer2)))
    # Binesh - We actually don't use the delta for the bias term. (What would be the point?
    # it has no inputs. Hence the line below.
    delta2 = delta2[:,1:]

    # But, delta's are just derivatives wrt the inputs to the sigmoid.
    # We don't add those to theta directly. We have to multiply these by
    # the preceding layer to get the theta2d's and theta1d's
    theta2d = np.dot(layer2.T, delta3)
    theta1d = np.dot(layer1.T, delta2)

    #update weights
    # Binesh - here you had delta3 and delta2... Those are not the
    # the derivatives wrt the theta's, they are the derivatives wrt
    # the inputs to the sigmoids.. (As I mention above)
    theta2 += learn_rate * theta2d #??
    theta1 += learn_rate * theta1d #??

def train(X, Y):
    for _ in range(10000):
        for i in range(4):
            x = X[i]
            y = Y[i]
            fit(x, y, theta1, theta2)


# Binesh - Here's a little test function to see that it actually works
def test(X):
    for i in range(4):
        layer1 = np.matrix(X[i],dtype='f')
        layer1 = np.c_[np.ones(1), layer1]
        layer2 = np.c_[np.ones(1), vec_sigmoid(layer1.dot(theta1))]
        layer3 = sigmoid(layer2.dot(theta2))
       

X = [(0,0), (1,0), (0,1), (1,1)]
Y = [0, 1, 1, 0]    
train(X, Y)
print(train(X,Y))

# Binesh - Alright, let's see!
print(test(X))

None
None


In [13]:
import matplotlib.pyplot as plt