In [None]:
# import stuff
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.integrate import solve_ivp

In [455]:
## lets try and make neural network class
# input_data, output_data, [number of neurons and size], activation_function, epochs, learning_rate,  
# optomizers: ADAMW
class Neural_Network:
    def __init__(self, input_data, output_data, hidden_layers, activation_functions, epochs, learning_rate, optimizer, loss_function, batch_size):
        self.input_data = input_data
        self.output_data = output_data
        self.hidden_layers = hidden_layers # an array of length number of layers. each value is the number 
        self.activation_function = activation_functions # a list of Tuples (a, x) where a is the activation type ('relu', 'sigmoid', 'tanh', 'lrelu') and x is the alpha range(0, 1) for lrelu 
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.loss_function = loss_function
        self.batch_size = batch_size
        self.weights = []
        self.biases = []
        self.initialize_weights()
        # how should the data be inputed...
        # here you can split the data into training and testing data
        # I am going to make my dataset below and that will make it easier to find out what the shape is
        
        # STILL NEED TO CREATE MINIBATCHES
        ## building the model
    def initialize_weights(self): 
        last_layer = self.input_data.shape[0] # The rows of input data (feature space)
        for i, size in enumerate(self.hidden_layers):
            self.weights.append(0.1 * np.random.randn(size, last_layer)) # (neuron number, feature space)
            self.biases.append(np.zeros((size, 1)))
            last_layer = self.hidden_layers[i]
            
        self.weights.append(0.1 * np.random.randn(self.output_data.shape[0], last_layer)) # last layer has shape (neuron number, 
        self.biases.append(np.zeros((self.output_data.shape[0], 1)))
        print(f'last weights {self.weights[-1].shape}')
        print(f' first weights {self.weights[0].shape}')
                
        # build the 1st layer with inputs
    def activation(self, input, type):#Relu, sigmoid, tanh
        
        if type[0] == "relu":
            return np.maximum(0, input)
        elif type[0] == "sigmoid":
            return 1 / (1 + np.exp(-input))
        elif type[0] == "tanh":
            return np.tanh(input)
        elif type[0] == "lrelu": # THink about how you can have an alpha parameter when you want it 
            return np.maximum(type[1] * input, input) 
        else:
            raise Exception("Invalid activation function")

    def activation_derivative(self, input, type): # check this you should remember what function you had
        
        if type[0] == 'relu':
            return np.where(input > 0, 1, 0)
        elif type[0] == 'sigmoid':
            sig = self.activation(input, ('sigmoid', 0))
            return sig * (1 - sig)
        elif type[0] == 'tanh':
            return 1 - np.tanh(input)**2
        elif type[0] == 'lrelu':
            dx = np.ones_like(input)
            dx[input <= 0] = type[1]   # THink about how you can have an alpha parameter when you want it 
            return dx
        
        else:
            raise Exception("Invalid activation function derivative")
                              
            
    def forward(self, inputs):
        activations = []
        for i, size in enumerate(self.hidden_layers):
            z = np.dot(self.weights[i], inputs) + self.biases[i] # dot product of the matrices "input" (the previous layer activations) and "self.weights" + self.biases which has same number of rows
            print(f'forwardsshape{z.shape}')
            a = self.activation(z, self.activation_function[i]) # measuring activation
            activations.append((z, a)) # added to an array that can be accessed later
            inputs = a # output if the layer is the inout of the next
            
        z = np.dot(self.weights[-1], inputs) + self.biases[-1]
        print(f'z_shape_output {z.shape}')
        a = z
        activations.append((z, a))
        print(f'activations {activations[-1][0].shape}')
        return activations ### weights and biases are recorded in self.weights and self.biases, outputs recorded in activations
        
        
    def compute_loss(self, predictions, targets):
        if self.loss_function == 'mse':
            return np.mean((predictions - targets) ** 2) # the loss function averaged out for normalisation
        else:
            raise Exception("Unsupported loss function")
            
    def loss_backwards(self, predictions, targets):
        if self.loss_function == 'mse':
            return 2 * (predictions - targets) / len(targets) # the loss function averaged out for normalisation
        else:
            raise Exception("Unsupported loss function")

    def train(self): # choosing optimizer (the method we use to train the model with)
        if self.optimizer == "SGD":
            return self.sgd_train()
        elif self.optimizer == "AdamW":
            return self.adamw_train()

    def backwards(self, activations, dvalues):
        grads = []
        z, a = activations[-1]
        delta = dvalues * self.activation_derivative(z, self.activation_function[-1])
        print(f'delta shape: {delta.shape}, z shape: {z.shape}')
        grads.append((delta, self.weights[-1], self.biases[-1]))
    
        for i in reversed(range(len(self.hidden_layers))):
            z, a_prev = activations[i]
            dadz = self.activation_derivative(z, self.activation_function[i])
            print(f'Layer {i}, z shape: {z.shape}, dadz shape: {dadz.shape}')
    
            # Backpropagate delta
            delta = np.dot(self.weights[i + 1].T, delta)
            print(f'Post dot delta shape: {delta.shape}')
            
            # Element-wise multiply with activation derivative
            delta *= dadz
            print(f'Post multiply delta shape: {delta.shape}')
    
            grads.append((delta, self.weights[i], self.biases[i]))

        
        return grads


    def sgd_train(self):
        epoch_data = []
        loss_history = []
        # for i in epoch:
        # forward pass = self.forward(self.create_minibatches(self.input_data))
        # backwards = self.backwards(compute_loss(forward_pass))
        # self.update_weights(backwards)
        for epoch in range(self.epochs):
            for start in range(0, self.input_data.shape[1], self.batch_size):
                start = start
                end = start + self.batch_size
                print(f'end {end}')
                batch_inputs = self.input_data[start:end]
                print(f'batchinputs {batch_inputs.shape}')
                batch_outputs = self.output_data[start:end]
                print(f'batchoutputs {len(batch_outputs)}')
    
                
                activations = self.forward(batch_inputs)
                predictions = activations[-1][1]
                print(f' predictions {predictions.shape}')
                loss_backwards = self.loss_backwards(predictions, batch_outputs)
                print(f'loss_backwards {loss_backwards.shape}')
                loss = self.compute_loss(predictions, batch_outputs)
                

                grads = self.backwards(activations, loss_backwards)
                print(f' grads first {grads[0][0].shape}')
                self.update_weights(grads[::-1])# , start, end)

            if epoch % 2 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")
                epoch_data.append(epoch)
                loss_history.append(loss)

        plt.plot(epoch_data, loss_history)
        plt.xlabel("Iteration Number")
        plt.ylabel("Loss")
        plt.title("Iteration Number vs Loss")
        plt.show()
        
    def adamw_train(self):
        pass
    
    def update_weights(self, grads):#, start, end):
        # print(len(self.weights))
        # print(len(grads))
        if self.optimizer == "SGD":
            for i, (grad, weights, biases) in enumerate(grads):
                print(self.weights[i].shape, self.learning_rate, grad.shape)
                self.weights[i] -= self.learning_rate * grad @ self.input_data.T   #[start:end] ## don't need to transpose grad but it is currently (last_layer, input_size) and we want it to have the shape (last_layer
                self.biases[i] -= self.learning_rate * np.sum(grad, axis=0, keepdims=True)
            
        elif self.train == "AdamW":
            pass
        #else:
            #raise Exception("Invalid optimizer entered")

    def predict(self, inputs):
        activations = self.forward(inputs)
        return activations[-1][0]
## methods: build, train, use (give predictions)


In [312]:
X_train = np.random.uniform(-1, 1, (5, 500)) #25, 20, 5
Y_train = np.random.uniform(0, 1, (2, 500)) # 25, 20, 2

In [456]:
test = Neural_Network(X_train, Y_train, [30], [("relu", 0), ("relu", 0)], 10, 0.1, 'SGD', "mse", 20)
test.train()

last weights (2, 30)
 first weights (30, 5)
end 20
batchinputs (5, 500)
batchoutputs 2
forwardsshape(30, 500)
z_shape_output (2, 500)
activations (2, 500)
 predictions (2, 500)
loss_backwards (2, 500)
delta shape: (2, 500), z shape: (2, 500)
Layer 0, z shape: (30, 500), dadz shape: (30, 500)
Post dot delta shape: (30, 500)
Post multiply delta shape: (30, 500)
 grads first (2, 500)
(30, 5) 0.1 (30, 500)


ValueError: non-broadcastable output operand with shape (30,1) doesn't match the broadcast shape (30,500)