# Layers

In [1]:
import numpy as np
import pandas as pd

In [31]:
def one_hot_encoded(y_train, classes = 0):
    one_hot = np.zeros((len(y_train), classes))
    array = y_train
    if type(array) is not np.ndarray: 
        array = np.array(y_train)
    for i in range(len(y_train)):
        one_hot[i][array[i]] = 1.0
    return one_hot

In [202]:
class Model:
    
    # Constructor
    def __init__(self):
        self.num_layers = 0
        self.layers_info = {}
        self.layers = []
        self.net_sum = []
        self.weights = []
        self.biases = []
        self.jacobian_weights = []
        self.jacobian_biases = []
    
    # Add a fully connected layer
    def add_dense_layer(self, size, input_size = 0, activation = 'linear'):
        self.num_layers += 1
        self.layers_info['Layer_'+str(self.num_layers)] = {
            'size': size,
            'activation': activation
        }
        self.layers.append(np.ones(size))
        self.net_sum.append(np.ones(size))
        if input_size != 0:
            self.weights.append(np.ones((size, input_size)))
        else:
            self.weights.append(np.ones((size, self.layers_info['Layer_'+str(self.num_layers-1)]['size'])))
        self.biases.append(np.ones(size))
        
    def mse(self, true, pred):
        return ((true-pred)**2)/2
        
    # Selection of Loss Function and Optimization function
    def set_parameters(self, lr = 0.01, loss = 'mse'):
        if loss == 'mse':
            self.loss = loss
            self.loss_function = self.mse
        self.lr = lr
    
    def sigmoid(self, x):
        return 1 / (1 + (np.e ** -x))
    
    def relu(self, x):
        return max(0, x)
    
    def softmax(self, x):
        exp = np.exp(x)
        return np.true_divide(exp, sum(exp)).transpose()
    
    def activation(self, net_sum, activation):
        if activation == 'sigmoid':
            activation_function = self.sigmoid
            output_vector = np.array([activation_function(i) for i in net_sum]).transpose()
        elif activation == 'relu':
            activation_function = self.relu
            output_vector = np.array([activation_function(i) for i in net_sum]).transpose()
        elif activation == 'softmax':
            activation_function = self.softmax
            output_vector = activation_function(net_sum)
        return output_vector
    
    # Forward Propogation
    def neural_network_output(self, record):
        input_vector = record.transpose()
        output_vector = None
        for i in range(len(self.layers)):
            # y = aW + b
            self.net_sum[i] = np.matmul(self.weights[i], input_vector) + self.biases[i].transpose()
            self.layers[i] = self.activation(self.net_sum[i], self.layers_info['Layer_'+str(i+1)]['activation'])
            input_vector = self.layers[i]
        output_vector = self.layers[len(self.layers)-1]
        return output_vector
    
    # For output layer
    # delta = dE/dnet_sum_output = dE/doutput * doutput/dnet_sum_output
    # dE/doutput
    def output_loss_derivative(self, true, pred):
        if self.loss == 'mse':
            return (true - pred)
    
    # doutput/dnet_sum_output
    def activation_derivative(self, output_vector_i, net_sum_i, activation):
        if activation == 'relu':
            return np.array([1 if i>0 else 0 for i in net_sum_i])
        elif activation == 'sigmoid':
            return output_vector_i * (1 - output_vector_i)
        elif activation == 'softmax':
            return output_vector_i * (1 - output_vector_i)
        else:
            return output_vector_i * (1 - output_vector_i)
    
    # For hidden layer
    # dE/dnet_sum_hidden = dotproduct(weights_ji, delta_o)
    def hidden_loss_derivative(self, grad_next, layer_number, num_of_neurons_next):
        grad = []
        for i in range(num_of_neurons_next):
            weights_next = self.weights[layer_number+1][:, i]
            grad.append(np.dot(weights_next, grad_next))
        return np.array(grad)
    
    # Training using Backpropogation
    def train(self, X_train, y_train, epochs = 1, batch_size = 0):
        
        # initializing Jacobian
        for i in range(self.num_layers):
            size = self.layers_info['Layer_'+str(i+1)]['size']            
            self.jacobian_weights.append(np.zeros((size)))
            self.jacobian_biases.append(np.zeros((size)))
        
        # Creation of batches
        if batch_size == 0:
            batch_size = len(y_train)
        batches = int(abs(len(X_train) / batch_size))
        X_train = np.array_split(X_train, batches)
        y_train = np.array_split(y_train, batches)
        
        while epochs: 
            for batch in range(batches):
                for record, label in zip(X_train[batch], y_train[batch]):
                    true_output = np.array([label])
                    predicted_output = self.neural_network_output(np.array(record))

                    # gradient of output layer
                    output_vector_i = self.layers[self.num_layers-1]
                    net_sum_i = self.net_sum[self.num_layers-1]
                    activation = self.layers_info['Layer_'+str(self.num_layers)]['activation']
                    # derivative of output loss * derivetive of activation
                    grad_o = self.output_loss_derivative(true_output, predicted_output) * self.activation_derivative(output_vector_i, net_sum_i, activation)
                    
                    # Jacobian Output Layer - Jw = input * grad_o and Jb = 1.0 * grad_o
                    if self.num_layers-2 < 0:
                        self.jacobian_weights[self.num_layers-1] += np.array(record) * grad_o
                    self.jacobian_weights[self.num_layers-1] += grad_o[:, None] @ self.layers[self.num_layers-2][None, :]
                    self.jacobian_biases[self.num_layers-1] += grad_o
                    
                    # gradient of hidden layer
                    grad_next = grad_o
                    for i in range(self.num_layers-2, -1, -1):
                        output_vector_i = self.layers[i]
                        net_sum_i = self.layers[i]
                        activation = self.layers_info['Layer_'+str(i+1)]['activation']
                        num_of_neurons_next = self.layers_info['Layer_'+str(i+2)]['size']
                        
                        # derivative of hidden loss * derivetive of activation
                        grad_h = self.hidden_loss_derivative(grad_next, i, num_of_neurons_next) * self.activation_derivative(output_vector_i, net_sum_i, activation)
                        
                        # Jacobian Hidden layer - Jw = input * grad_next and Jb = 1.0 * grad_next
                        if i-1 < 0:
                            self.jacobian_weights[i] += np.array(record) * grad_next
                        self.jacobian_weights[i] += grad_next @ self.layers[i-1]
                        self.jacobian_biases[i] += grad_next
                        
                        # change the gradient
                        grad_next = grad_h
                
                # Divide accumulated jacobian by number of records in the batch
                for i in range(self.num_layers):
                    self.jacobian_weights[i] = np.true_divide(self.jacobian_weights[i], len(record))
                    self.jacobian_biases[i] = np.true_divide(self.jacobian_biases[i], len(record))
                
                # Update weights and biases
                for i in range(self.num_layers-1, -1, -1):
                    self.weights[i] += self.lr * self.jacobian_weights[i]
                    self.biases[i] += self.lr * self.jacobian_biases[i]
            
            # Loss Calculation
            loss = 0
            for record, label in zip(X_train[batch], y_train[batch]):
                true_output = label
                predicted_output = self.neural_network_output(record)
                loss += self.loss_function(true_output, predicted_output)
            print('Loss: ',loss/(len(X_train)))
            
            epochs -= 1
    
    # Predict
    def predict(self, X_test):
        predictions = []
        classes = self.layers_info['Layer_'+str(self.num_layers-1)]['size']
        for record in X_test:
            predicted_output = self.neural_network_output(record)
            if classes == 1:
                if predicted_output >= 0.5:
                    predictions.append(1)
                else:
                    predictions.append(0)
            else:
                predict = np.argmax(predicted_output)
                predictions.append(predict)
        return predictions
            
            
    
    

In [73]:
model = Model()

model.add_dense_layer(2, 2, activation='relu')
model.add_dense_layer(2, activation='softmax')

X_train = np.array([[0,0], [0,1], [1,0], [1,1]])
y_train = [0, 1, 1, 0]
y_train = one_hot_encoded(y_train, classes = 2)

x = model.train(X_train, y_train)

[0 0]
[1. 0.]


In [203]:
tmodel = Model()

tmodel.add_dense_layer(4, 3, activation='sigmoid')
tmodel.add_dense_layer(1, activation='sigmoid')

X_train = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 0, 0], [1, 1, 1], [0, 0, 0]])
y_train = np.array([0, 1, 1, 1, 1, 0, 0])

tmodel.set_parameters(lr=1, loss='mse')

x = tmodel.train(X_train, y_train, batch_size=1, epochs=60000)

ValueError: shapes (1,) and (4,) not aligned: 1 (dim 0) != 4 (dim 0)

In [181]:
tmodel.predict(X_train)

[0, 0, 0, 0, 0, 0, 0]

In [150]:
tmodel.weights

[array([[-0.1931976 , -0.25570842],
        [-0.1931976 , -0.25570842]]), array([[-0.0350957, -0.0350957]])]

In [221]:
a = np.array([1])
b = np.array([1,2,3,4])
a[:,None] @ b[None,:]

array([[1, 2, 3, 4],
       [2, 4, 6, 8]])