In [1]:
import numpy as np
from scipy.stats import logistic

In [2]:
def onehot_output(output):
    result = np.zeros(output.shape)
    for i in range(output.shape[0]):
        o = output[i, :]
        result[i, o.argmax()] = 1
    return result

In [3]:
class NeuralNetwork:
    
    def __init__(self, X, Y, hidden_layers, learning_rate, n_iterations):
        self.X = X
        self.Y = Y
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.layers = [np.zeros((X.shape[0], i)) for i in hidden_layers]
        self.weight = [np.matrix(np.random.rand(hidden_layers[0], X.shape[1]))] \
                    + [np.matrix(np.random.rand(hidden_layers[i+1], hidden_layers[i])) for i in range(len(hidden_layers)-1)] \
                    + [np.matrix(np.random.rand(Y.shape[1], hidden_layers[-1]))]
        self.output = np.zeros(Y.shape)
    
    def get_weight(self):
        return self.weight
    
    def get_hidden_values(self):
        return self.layers
    
    def forward_propagation(self):
        X = self.X
        for i in range(len(self.weight)-1):
            self.layers[i] = logistic.cdf(X.dot(self.weight[i].T))
            X = self.layers[i]
        self.output = logistic.cdf(X.dot(self.weight[-1].T))
    
    def backpropagation(self):
        deltas = self.calculate_deltas()
        for i, (delta, X) in enumerate(zip(deltas, [self.X] + self.layers)):
            self.weight[i] += self.learning_rate / self.X.shape[0] * delta.T.dot(X)
    
    def calculate_deltas(self):
        output_error = self.output_error()
        deltas = self.hidden_layer_error(output_error) + [output_error]
        return deltas
    
    def output_error(self):
        return self.output * (1 - self.output) * (self.Y - self.output)
    
    def hidden_layer_error(self, output_error):
        result = []
        error = output_error
        for layer, weight in zip(reversed(self.layers), reversed(self.weight[1:])):
            result.append(np.array(layer * (1 - layer)) * np.array(error.dot(weight)))
            error = result[-1]
        return list(reversed(result))
    
    def update_learning_rate(self, current_iteration):
        self.learning_rate = self.learning_rate * 1
    
    def train(self):
        for i in range(self.n_iterations):
            self.forward_propagation()
            self.backpropagation()
            self.update_learning_rate(i)
    
    def predict(self, new_inputs):
        output = new_inputs
        for weight in self.weight:
            output = logistic.cdf(output.dot(weight.T))
        return output

In [4]:
class StochasticNeuralNetwork:
    
    def __init__(self, X, Y, hidden_layers, learning_rate, n_iterations):
        self.X = X
        self.Y = Y
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.layers = [np.zeros((i, 1)) for i in hidden_layers]
        self.weight = [np.matrix(np.random.rand(hidden_layers[0], X.shape[1]))] \
                    + [np.matrix(np.random.rand(hidden_layers[i+1], hidden_layers[i])) for i in range(len(hidden_layers)-1)] \
                    + [np.matrix(np.random.rand(Y.shape[1], hidden_layers[-1]))]
        self.input = np.zeros((X.shape[1], 1))
        self.output = np.zeros((Y.shape[1], 1))
        self.curr_index = 0
        self.next_index = 0
    
    def get_weight(self):
        return self.weight
    
    def get_hidden_values(self):
        return self.layers
    
    def set_input(self):
        self.curr_index = self.next_index
        self.input = self.X[self.curr_index, :].reshape(-1, 1)
        self.next_index = (self.curr_index + 1) % self.X.shape[0]
    
    def forward_propagation(self):
        x = self.input
        weight = self.weight[0]
        for i in range(len(self.layers)):
            self.layers[i] = logistic.cdf(weight.dot(x)).reshape(-1, 1)
            x = self.layers[i]
            weight = self.weight[i+1]
        self.output = logistic.cdf(weight.dot(x)).reshape(-1, 1)
    
    def backpropagation(self):
        deltas = self.calculate_deltas()
        for i, (delta, x) in enumerate(zip(deltas, [self.input] + self.layers)):
            self.weight[i] += self.learning_rate * delta.dot(x.T)
    
    def calculate_deltas(self):
        output_error = self.output_error()
        deltas = self.hidden_layer_error(output_error) + [output_error]
        return deltas
    
    def output_error(self):
        y = self.Y[self.curr_index, :].reshape(-1, 1)
        o = self.output
        return o * (1 - o) * (y - o)
    
    def hidden_layer_error(self, output_error):
        result = []
        error = output_error
        for layer, weight in zip(reversed(self.layers), reversed(self.weight[1:])):
            result.append(layer * (1 - layer) * np.array(weight.T.dot(error)))
            error = result[-1]
        return list(reversed(result))
    
    def train(self):
        for i in range(self.n_iterations):
            self.set_input()
            self.forward_propagation()
            self.backpropagation()
    
    def predict(self, new_inputs):
        output = new_inputs
        for weight in self.weight:
            output = logistic.cdf(output.dot(weight.T))
        return output

### Test size 2:

In [5]:
size = 2

In [6]:
nn = NeuralNetwork(np.eye(size), np.eye(size), (1,), 1, 10000)
nn.train()

In [7]:
prediction = nn.predict(np.matrix(np.eye(size)))
prediction

array([[0.4997707 , 0.5002293 ],
       [0.01171122, 0.98828907]])

In [8]:
onehot_output(prediction)

array([[0., 1.],
       [0., 1.]])

In [9]:
for i in nn.get_weight():
    print(i)

[[-8.51582524  3.41962791]]
[[-4.58057523]
 [ 4.58060072]]


In [10]:
for i in nn.get_hidden_values():
    print(i.round())

[[0.]
 [1.]]


### Test size 4:

In [11]:
size = 4

In [12]:
nn = NeuralNetwork(np.eye(size), np.eye(size), (2,), 1, 10000)
nn.train()

In [13]:
prediction = nn.predict(np.matrix(np.eye(size)))
prediction

array([[9.44099570e-01, 1.05046412e-01, 1.80424891e-08, 1.05046412e-01],
       [1.03331960e-01, 3.50896485e-01, 1.03540562e-01, 3.50896485e-01],
       [1.63870997e-08, 1.04827823e-01, 9.44106086e-01, 1.04827823e-01],
       [1.03331960e-01, 3.50896485e-01, 1.03540562e-01, 3.50896485e-01]])

In [14]:
onehot_output(prediction)

array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [15]:
for i in nn.get_hidden_values():
    print(i.round())

[[1. 0.]
 [0. 0.]
 [0. 1.]
 [0. 0.]]


### Test size 8:

In [16]:
size = 8

In [17]:
nn = NeuralNetwork(np.eye(size), np.eye(size), (3,), 1, 10000)
nn.train()

In [18]:
prediction = nn.predict(np.matrix(np.eye(size)))
prediction

array([[8.97777654e-01, 7.77996885e-03, 9.07960899e-08, 3.00872489e-08,
        2.34081085e-02, 9.64294043e-03, 2.00829185e-02, 1.64456215e-01],
       [1.15153811e-02, 9.22974814e-01, 3.22911605e-06, 4.14140258e-02,
        1.23606073e-06, 4.66854461e-02, 6.55656595e-02, 1.08921176e-01],
       [1.63726465e-07, 8.08209459e-08, 9.12795261e-01, 6.92452856e-03,
        3.44589333e-02, 1.11512369e-01, 7.18186921e-02, 6.17153582e-03],
       [1.54897100e-06, 4.18496060e-02, 9.86818184e-03, 9.11981410e-01,
        6.54664019e-07, 1.15425752e-01, 9.60275357e-02, 1.88699930e-02],
       [2.62796622e-02, 7.97697606e-06, 3.56023385e-02, 7.00284890e-06,
        9.28846500e-01, 7.73871223e-02, 8.36873644e-02, 8.64186483e-02],
       [1.51289084e-02, 3.06537695e-02, 1.29486209e-01, 1.07940023e-01,
        4.80403972e-02, 2.54629161e-01, 2.48560117e-01, 1.78449579e-01],
       [3.19137443e-02, 5.18935873e-02, 9.01884624e-02, 8.94246702e-02,
        5.63933236e-02, 2.53843164e-01, 2.54685510e-01, 2.

In [19]:
onehot_output(prediction)

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [20]:
for i in nn.get_hidden_values():
    print(i.round())

[[1. 0. 1.]
 [0. 0. 1.]
 [1. 1. 0.]
 [0. 1. 1.]
 [1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### Test size 16:

In [21]:
size = 16

In [22]:
nn = NeuralNetwork(np.eye(size), np.eye(size), (4,), 1, 10000)
nn.train()

In [23]:
prediction = nn.predict(np.matrix(np.eye(size)))
prediction

array([[5.56038234e-01, 6.02960874e-06, 7.46371874e-02, 1.51837333e-05,
        2.10485723e-03, 2.25600084e-06, 4.83443722e-02, 1.50673075e-02,
        1.34395659e-01, 1.98721714e-01, 6.26137712e-04, 1.20960855e-02,
        3.07535080e-02, 7.16696273e-04, 9.34415633e-03, 3.46434738e-05],
       [5.53962170e-05, 7.61186080e-01, 1.50378220e-02, 2.83747433e-02,
        1.81497997e-01, 7.03869432e-02, 5.20879706e-07, 9.50389688e-02,
        2.48447618e-07, 2.64997384e-05, 6.13599883e-04, 3.55221010e-03,
        8.90527759e-02, 6.60143804e-05, 9.84776908e-03, 5.76238102e-06],
       [9.85140587e-02, 7.37234328e-03, 2.13863970e-01, 5.56489940e-02,
        9.04574826e-02, 1.54737602e-02, 1.21603312e-01, 3.10323284e-02,
        5.02658882e-02, 3.83780002e-02, 3.38866926e-02, 1.46260145e-02,
        1.49455236e-01, 7.50679906e-02, 8.33910180e-02, 6.58464515e-02],
       [1.78206442e-04, 3.47879217e-02, 7.66764258e-02, 7.13012710e-01,
        2.41191120e-01, 9.62402331e-02, 3.16279018e-03, 2.004

In [24]:
onehot_output(prediction)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [25]:
for i in nn.get_hidden_values():
    print(i.round())

[[1. 0. 0. 1.]
 [1. 1. 1. 0.]
 [0. 0. 0. 0.]
 [0. 1. 0. 0.]
 [1. 1. 0. 0.]
 [0. 1. 1. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 1. 1.]
 [1. 0. 0. 1.]
 [1. 1. 0. 1.]
 [1. 0. 1. 0.]
 [0. 0. 0. 0.]
 [0. 1. 1. 1.]
 [0. 0. 1. 0.]
 [0. 1. 1. 1.]]


## Test

In [26]:
X = np.matrix([[1, 1], [0, 0], [1, 0], [0, 1]])
X

matrix([[1, 1],
        [0, 0],
        [1, 0],
        [0, 1]])

In [27]:
Y = np.array([0, 0, 1, 1]).reshape(-1, 1)
Y

array([[0],
       [0],
       [1],
       [1]])

In [28]:
nn = NeuralNetwork(X, Y, (2,), 0.6, 50000)
nn.train()

In [29]:
prediction = nn.predict(X)
prediction

array([[0.11102602],
       [0.04182441],
       [0.91658384],
       [0.91658428]])