## Part a to e

In [None]:
import numpy as np
import matplotlib.pyplot as plt

####### Part a to e

def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def sigmoid_prime(x):
    return (1.0/(1.0 + np.exp(-x))) * (1 - (1.0/(1.0 + np.exp(-x))))

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.power(tanh(x),2)


class NeuralNetwork:

    def __init__(self, layers, activation='tanh'):
        if activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_prime = sigmoid_prime
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_prime = tanh_prime

        # Set weights
        self.weights = []
        # layers = [2,2,1]
        # range of weight values (-1,1)
        # input and hidden layers - random((2+1, 2+1)) : 3 x 3
        for i in range(1, len(layers) - 1):
            r = 2*np.random.random((layers[i-1] + 1, layers[i] + 1)) -1
            self.weights.append(r)
        # output layer - random((2+1, 1)) : 3 x 1
        r = 2*np.random.random( (layers[i] + 1, layers[i+1])) - 1
        self.weights.append(r)

    def fit(self, X, y, learning_rate=0.02, epochs=1000000):
        # Add column of ones to X
        # This is to add the bias unit to the input layer
        ones = np.atleast_2d(np.ones(X.shape[0]))
        X = np.concatenate((ones.T, X), axis=1)
        error_arr = []
        temp = 0
        for k in range(epochs):
            if k % 10000 == 0: print('epochs:', k)
            
            i = np.random.randint(X.shape[0])
            a = [X[i]]

            for l in range(len(self.weights)):
                dot_value = np.dot(a[l], self.weights[l])
                activation = self.activation(dot_value)
                a.append(activation)
            # output layer
            error = y[i] - a[-1]
            if k % 500 == 0: 
              print('error = ', error)
              error_arr.append(error)
              temp = temp + 1
          

            deltas = [error * self.activation_prime(a[-1])]

            # we need to begin at the second to last layer 
            # (a layer before the output layer)
            for l in range(len(a) - 2, 0, -1): 
                deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_prime(a[l]))

            # reverse
            # [level3(output)->level2(hidden)]  => [level2(hidden)->level3(output)]
            deltas.reverse()

            # backpropagation
            # 1. Multiply its output delta and input activation 
            #    to get the gradient of the weight.
            # 2. Subtract a ratio (percentage) of the gradient from the weight.
            for i in range(len(self.weights)):
                layer = np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += learning_rate * layer.T*delta

        epoch_plot = 500.*(np.arange(temp) + 1)
        plt.plot(epoch_plot,error_arr)

            
    def predict(self, x): 
        a = np.concatenate((np.ones(1).T, np.array(x)))      
        for l in range(0, len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))
        return a

##### code for "XOR" with tanh activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,2,1],'tanh')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [1], [1], [0]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))

### Part f section 1 (Train the model for "OR" with tanh activation function)



In [None]:
##### code for "OR" with tanh activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,2,1],'tanh')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [1], [1], [1]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))

### Part f section 2 (Train the model for "AND" with tanh activation function)


In [None]:
##### code for "AND" with activation tanh activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,2,1],'tanh')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [0], [0], [1]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))

### Part g section 1 (Train the model for "XOR" with logistic activation function)

In [None]:
##### code for "XOR" with activation logistic activation function
class NeuralNetwork:

    def __init__(self, layers, activation='tanh'):
        if activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_prime = sigmoid_prime
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_prime = tanh_prime

        # Set weights
        self.weights = []
        # layers = [2,2,1]
        # range of weight values (-1,1)
        # input and hidden layers - random((2+1, 2+1)) : 3 x 3
        for i in range(1, len(layers) - 1):
            r = 2*np.random.random((layers[i-1] + 1, layers[i] + 1)) -1
            self.weights.append(r)
        # output layer - random((2+1, 1)) : 3 x 1
        r = 2*np.random.random( (layers[i] + 1, layers[i+1])) - 1
        self.weights.append(r)

    def fit(self, X, y, learning_rate=0.02, epochs=1000000):
        # Add column of ones to X
        # This is to add the bias unit to the input layer
        ones = np.atleast_2d(np.ones(X.shape[0]))
        X = np.concatenate((ones.T, X), axis=1)
        error_arr = []
        temp = 0
        for k in range(epochs):
            if k % 100000 == 0: print('epochs:', k)
            
            i = np.random.randint(X.shape[0])
            a = [X[i]]

            for l in range(len(self.weights)):
                dot_value = np.dot(a[l], self.weights[l])
                activation = self.activation(dot_value)
                a.append(activation)
            # output layer
            error = y[i] - a[-1]
            if k % 5000 == 0: 
              print('error = ', error)
              error_arr.append(error)
              temp = temp + 1
          

            deltas = [error * self.activation_prime(a[-1])]

            # we need to begin at the second to last layer 
            # (a layer before the output layer)
            for l in range(len(a) - 2, 0, -1): 
                deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_prime(a[l]))

            # reverse
            # [level3(output)->level2(hidden)]  => [level2(hidden)->level3(output)]
            deltas.reverse()

            # backpropagation
            # 1. Multiply its output delta and input activation 
            #    to get the gradient of the weight.
            # 2. Subtract a ratio (percentage) of the gradient from the weight.
            for i in range(len(self.weights)):
                layer = np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += learning_rate * layer.T*delta

        epoch_plot = 500.*(np.arange(temp) + 1)
        plt.plot(epoch_plot,error_arr)
         

    def predict(self, x): 
        a = np.concatenate((np.ones(1).T, np.array(x)))      
        for l in range(0, len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))
        return a
        
##### code for "AND" with activation logistic activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,200,1],'sigmoid')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [1], [1], [0]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))

### Part g section 1 (Train the model for "OR" with logistic activation function)


In [None]:
##### code for "OR" with activation logistic activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,200,1],'sigmoid')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [1], [1], [1]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))

### Part g section 3 (Train the model for "AND" with logistic activation function)


In [None]:
##### code for "AND" with activation logistic activation function

if __name__ == '__main__':

    nn = NeuralNetwork([2,200,1],'sigmoid')

    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    y = np.array([[0], [0], [0], [1]])

    nn.fit(X, y)

    for e in X:
        print(e,nn.predict(e))