In [175]:
import numpy as np
from collections.abc import Iterable
import numbers
import jax.numpy as jnp
from jax import grad

In [176]:
class MLP_with_backpropagation():
    def identity(x):
        return x
        
    def __init__(self, shape, activations = None): #len(activations)+1=len(shape)
        fed_values = []
        activation_values = []
        for layer_size in shape:
            fed_values.append(np.array([0]*layer_size))
            activation_values.append(np.array([0]*layer_size))
        self.fed_values = fed_values
        self.activation_values = activation_values
        #first layer values are set so that indexes match
        weights = [0]
        biases = [0]
        for i in range(1, len(self.fed_values)):
            n = len(self.fed_values[i])
            m = len(self.fed_values[i-1])
            #initialising with random values
            weight_matrix = np.random.normal(0,1,(n,m))
            weights.append(weight_matrix)
            bias_vector = np.random.normal(0,1,n)
            biases.append(bias_vector)
        self.weights = weights
        self.biases = biases
        
        if activations:
            self.activations = [0] + [np.vectorize(activation) for activation in activations]
            self.activations_sv = [0] + [activation for activation in activations]
        else:
            self.activations = [0] + [np.vectorize(MLP_with_backpropagation.identity)] * (len(shape) - 1)
            self.activations_sv = [0] + [MLP_with_backpropagation.identity] * (len(shape) - 1)
            
    #2 functions below are only for technical purposes
    def is_iterable(obj):
        return isinstance(obj, Iterable)

    def is_numeric_vector_of_given_length(supposed_vector, length):
         if not MLP_with_backpropagation.is_iterable(supposed_vector):
             return False
         if len(supposed_vector) != length:
             return False
         for el in supposed_vector:
             if not isinstance(el, numbers.Number):
                 return False
         return True
        
    def set_input(self, inputt):
        if not MLP_with_backpropagation.is_numeric_vector_of_given_length(inputt, len(self.fed_values[0])):
            print("Wrong input size or type, it is supposed to be a numerical list or a 1D np.array of length of 1st layer")
            return False
        self.fed_values[0] = np.array(inputt)
        self.activation_values[0] = np.array(inputt) #input is input
        return True
        
    def feed_forward(self):
        for i in range(1, len(self.fed_values)):
            self.fed_values[i] = np.dot(self.weights[i], self.activation_values[i-1]) + self.biases[i] ##can do if statement if activation is function of layer not neuron
            self.activation_values[i] = self.activations[i](self.fed_values[i])

    def predict(self, x):
        if not self.set_input(x):
            return False
        self.feed_forward()
        return self.activation_values[-1]

    def predict_multiple(self, x):
        y = []
        for el in x:
            y.append(self.predict(el))
        return np.array(y)
        
    #manual setting of weights and biases    
    def set_weights(self, weights):
        self.weights = weights

    def set_biases(self, biases):
        self.biases = biases

    def squared_error(pred, expected):
        return (pred - expected)**2

    
    #returns a pair first element is for weights second for biases
    def derivative(self, inputt, expected):
        self.predict(inputt)
        dx = 10**(-6)
        weight_grad = [0] + [np.zeros(self.weights[i].shape) for i in range(1, len(self.weights))]
        bias_grad = [0] + [np.zeros(len(self.biases[i])) for i in range(1, len(self.biases))]
        neuron_activation_grad = [0] + [np.zeros(len(self.activation_values[i])) for i in range(1, len(self.activation_values))]
        neuron_fed_grad = [0] + [np.zeros(len(self.fed_values[i])) for i in range(1, len(self.fed_values))]
        last_layer = True
        for i in range(len(self.fed_values)-1, 0, -1):
            #derivatives in respect to neuron activation values
            for j in range(len(self.activation_values[i])):
                if last_layer:
                    x_0 = self.activation_values[i][j]
                    y_0 = expected[j]
                    dc_da = grad(MLP_with_backpropagation.squared_error, argnums = 0)
                    neuron_activation_grad[i][j] = dc_da(x_0, y_0)
                else:
                    neuron_activation_grad[i][j] = sum([neuron_fed_grad[i+1][k] * self.weights[i+1][k][j] for k in range(len(neuron_fed_grad[i+1]))])
                
                da_dz = grad(self.activations_sv[i], argnums = 0)                                       
                neuron_fed_grad[i][j] = neuron_activation_grad[i][j]*da_dz(self.fed_values[i][j])
                bias_grad[i][j] = neuron_fed_grad[i][j]
                
                for k in range(len(self.weights[i][j])):
                    weight_grad[i][j][k] = neuron_fed_grad[i][j] * self.activation_values[i-1][k]
            last_layer = False
        return (weight_grad, bias_grad)

    def add_2_lists_of_np_arrays(l1, l2):
        for i in range(len(l1)):
            l1[i] += l2[i]
        return l1
        
    def subtract_2_lists_of_np_arrays(l1, l2):
        for i in range(len(l1)):
            l1[i] -= l2[i]
        return l1
        
    def create_0filled_list_of_np_arrays(lista):
        listr = [0]
        for i in range(1, len(lista)):
            listr.append(np.zeros(lista[i].shape))
        return listr

    def multiply_list_elementwise(lista, factor):
        for i in range(len(lista)):
            lista[i] = lista[i] * factor
        return lista

    def learn(self, x, y, epochs, lr=0.0001, batch_size = max(1,int(len(x)/100))):
            for i in range(epochs):
                idx = 0
                while(idx + batch_size < len(x)):
                    batchx = x[idx:idx+batch_size]
                    batchy = y[idx:idx+batch_size]
                    for i in range(batch_size):
                        local_weight_gradient, local_bias_gradient = self.derivative(batchx[i], batchy[i])
                        if i==0:
                            avg_weight_gradient = MLP_with_backpropagation.create_0filled_list_of_np_arrays(local_weight_gradient)
                            avg_bias_gradient = MLP_with_backpropagation.create_0filled_list_of_np_arrays(local_bias_gradient)
                        avg_weight_gradient = MLP_with_backpropagation.add_2_lists_of_np_arrays(avg_weight_gradient, local_weight_gradient)
                        avg_bias_gradient = MLP_with_backpropagation.add_2_lists_of_np_arrays(avg_bias_gradient, local_bias_gradient)
                    avg_weight_gradient = MLP_with_backpropagation.multiply_list_elementwise(avg_weight_gradient, 1/batch_size)
                    avg_bias_gradient = MLP_with_backpropagation.multiply_list_elementwise(avg_bias_gradient, 1/batch_size)
                    self.weights = MLP_with_backpropagation.subtract_2_lists_of_np_arrays(self.weights, MLP_with_backpropagation.multiply_list_elementwise(avg_weight_gradient, lr))
                    self.biases = MLP_with_backpropagation.subtract_2_lists_of_np_arrays(self.biases, MLP_with_backpropagation.multiply_list_elementwise(avg_bias_gradient, lr))
                    idx += batch_size
        

    
            
        

In [None]:
def MSE0_learn_MSE1(network, X_train, Y_train, X_test, Y_test):
    max_x = max(X_train)
    min_x = min(X_train)
    max_y = max(Y_train)
    min_y = min(Y_train)
    x_scl = max_x - min_x
    y_scl = max_y - min_y
    X_tr_scl = X_train / x_scl
    Y_tr_scl = Y_train / y_scl
    X_ts_scl = X_test / x_scl
    Y_ts_scl = Y_test / y_scl
    pred0 = network.predict_multiple(X_ts_scl.reshape(-1,1)) * y_scl
    MSE0 = sum((Y_test - pred0.reshape(-1))**2)
    network.learn(X_tr_scl.reshape(-1,1), Y_tr_scl.reshape(-1,1), 50)
    pred1 = network.predict_multiple(X_ts_scl.reshape(-1,1))* y_scl
    MSE1 = sum((Y_test - pred1.reshape(-1))**2)
    return MSE0, MSE1
    
network = MLP_with_backpropagation([1,5,1])
train = pd.read_csv("data/regression/square-simple-training.csv")
test = pd.read_csv("data/regression/square-simple-test.csv")

X_train = train['x'].values
Y_train = train['y'].values
X_test = test['x'].values
Y_test = test['y'].values

MSE0, MSE1 = MSE0_learn_MSE1(network, X_train, Y_train, X_test, Y_test)


    

In [181]:
MSE0



np.float64(27672589.18610299)

In [182]:
MSE1

np.float64(911699.2977257003)

In [122]:
print(MSE1)

[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]


In [150]:
network = MLP_with_backpropagation([1,5,1])
network.derivative([2], [1])

[-0.47281368  2.6941238  -0.75494902 -1.11110316 -0.09680387] + [ 0.81893917 -0.58106128 -0.24249267 -0.56237396 -1.28480126]
[-1.50452024] + [-0.36423103]


([0,
  array([[-0.49614689],
         [ 4.84981966],
         [ 5.09489298],
         [-8.459342  ],
         [ 1.36545646]]),
  array([[ -1.9858959 , -12.12370167,   5.72282427,   9.60157933,
            7.92696302]])],
 [0,
  array([-0.24807344,  2.42490983,  2.54744649, -4.229671  ,  0.68272823]),
  array([-5.73750257])])

In [152]:
print(network.derivative([2], [1]))

[-0.47281368  2.6941238  -0.75494902 -1.11110316 -0.09680387] + [ 0.81893917 -0.58106128 -0.24249267 -0.56237396 -1.28480126]
[-1.50452024] + [-0.36423103]
([0, array([[-0.49614689],
       [ 4.84981966],
       [ 5.09489298],
       [-8.459342  ],
       [ 1.36545646]]), array([[ -1.9858959 , -12.12370167,   5.72282427,   9.60157933,
          7.92696302]])], [0, array([-0.24807344,  2.42490983,  2.54744649, -4.229671  ,  0.68272823]), array([-5.73750257])])
