In [75]:
import pandas as pd
import numpy as np
data = pd.read_csv('XOR.csv').as_matrix()
X,y = data[:,0:2], data[:,2]
print(X,y)

[[0 0]
 [0 1]
 [1 0]
 [1 1]] [0 1 1 0]


In [81]:
class NeuralNetwork(object): # TODO: Generalize for multiple layers (pass in a list of #nodes/layer)
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate, weights_i_to_h=None, weights_h_to_o=None):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.weights_input_to_hidden = np.random.randn(self.hidden_nodes, self.input_nodes + 1) #3x3
        self.weights_hidden_to_output = np.random.randn(self.output_nodes, self.hidden_nodes + 1) # 1x4
        if weights_i_to_h is not None and weights_h_to_o is not None:
            self.weights_input_to_hidden = weights_i_to_h
            self.weights_hidden_to_output = weights_h_to_o
        self.lr = learning_rate
        self.activation_function = lambda x: 1 / (1 + np.exp(-x)) 

    def forward_propagation(self, x):
        x = np.array(x)
        hidden_inputs = np.dot(self.weights_input_to_hidden, np.append([1],x))
        hidden_outputs = self.activation_function(hidden_inputs)
        final_inputs = np.dot(self.weights_hidden_to_output, np.append([1],hidden_outputs))
        final_outputs = self.activation_function(final_inputs)
        return final_outputs        
        
    def back_propagation(self, x, y):
        # Forward propagation
        x = np.array(x) # 2x1
        hidden_inputs = np.dot(self.weights_input_to_hidden, np.append([1],x)) # (3x3)(3x1) = 3x1
        hidden_outputs = self.activation_function(hidden_inputs) # 3x1
        final_inputs = np.dot(self.weights_hidden_to_output, np.append([1],hidden_outputs)) # (1x4)(4x1) = 1x1
        final_outputs = self.activation_function(final_inputs) # 1x1        
        # Back propagation (Errors) - uses final_outputs, hidden_outputs
        output_errors = final_outputs - y # 1x1     
        hidden_errors = np.dot(self.weights_hidden_to_output[:,1:].T, output_errors) * hidden_outputs * (1 - hidden_outputs) # (4x1)(1x1) + (4x1) + (4x1) = (4x1)        
        # Back propagation (Update weights) - uses output_errors, hidden_outputs, hidden_errors, x
        delta_weights_hidden_to_output = np.outer(output_errors, np.append([1],hidden_outputs)) # (1x1)(4x1)T = (1x4) 
        delta_weights_input_to_hidden = np.outer(hidden_errors, np.append([1],x)) # (3x1)(3x1)T = (3x3)               
        return delta_weights_input_to_hidden, delta_weights_hidden_to_output
        
    def fit(self, X, y, epochs=10):
        for e in range(epochs):
            delta_weights_input_to_hidden = np.zeros((self.hidden_nodes, self.input_nodes + 1))
            delta_weights_hidden_to_output = np.zeros((self.output_nodes, self.hidden_nodes + 1))
            for i in range(X.shape[0]):   
                d_w_i_to_h, d_w_h_to_o = self.back_propagation(X[i], y[i])
                delta_weights_input_to_hidden += d_w_i_to_h
                delta_weights_hidden_to_output += d_w_h_to_o
            self.weights_input_to_hidden += - self.lr * (delta_weights_input_to_hidden / X.shape[0])
            self.weights_hidden_to_output += - self.lr * (delta_weights_hidden_to_output / X.shape[0])    
            
    def predict(self, X):
        res = []
        for x in X:
            res.append(self.forward_propagation(x))
        return np.array(res, ndmin=2)
            

In [82]:
# w_i_to_h = np.array([[-30, 20, 20],[20, -30, -30]])
# w_h_to_o = np.array([[-20, 30, 30]])
# network = NeuralNetwork(2, 2, 1, 0.1, weights_i_to_h=w_i_to_h, weights_h_to_o=w_h_to_o)

network = NeuralNetwork(2, 3, 1, 0.1)
network.fit(X, y, epochs=10000)
print(np.round(network.predict([[0,0],[0,1],[1,0],[1,1]])))

[[ 0.]
 [ 1.]
 [ 1.]
 [ 0.]]


In [68]:
a = [1,2,3]
b = np.array(a, ndmin=2).T
b

array([[1],
       [2],
       [3]])

In [47]:
np.array([1,2,3]) * [2,2,2]

array([2, 4, 6])

In [51]:
[1] + np.array([2,2,2])

array([3, 3, 3])

In [53]:
a = [2]
b = np.array([1,1])
np.append(a,b)

array([2, 1, 1])

In [4]:
a = np.array([1,2,3])
b = np.array([2,3,4,5,6])
c = np.outer(a,b)
c

array([[ 2,  3,  4,  5,  6],
       [ 4,  6,  8, 10, 12],
       [ 6,  9, 12, 15, 18]])

In [6]:
c[:,1:]

array([[ 3,  4,  5,  6],
       [ 6,  8, 10, 12],
       [ 9, 12, 15, 18]])