In [None]:
# import stuff
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.integrate import solve_ivp

In [242]:
## lets try and make neural network class
# input_data, output_data, [number of neurons and size], activation_function, epochs, learning_rate,  
# optomizers: ADAMW
class Neural_Network:
    def __init__(self, input_data, output_data, hidden_layers, activation_functions, epochs, learning_rate, optimizer, loss_function, batch_size):
        self.input_data = input_data
        self.output_data = output_data
        self.hidden_layers = hidden_layers # an array of length number of layers. each value is the number 
        self.activation_function = activation_functions # a list of Tuples (a, x) where a is the activation type ('relu', 'sigmoid', 'tanh', 'lrelu') and x is the alpha range(0, 1) for lrelu 
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.loss_function = loss_function
        self.batch_size = batch_size
        self.weights = []
        self.biases = []
        self.initialize_weights()
        # how should the data be inputed...
        # here you can split the data into training and testing data
        # I am going to make my dataset below and that will make it easier to find out what the shape is
        
        # STILL NEED TO CREATE MINIBATCHES
        ## building the model
    def initialize_weights(self): 
        last_layer = self.input_data.shape[1]
        for i, size in enumerate(self.hidden_layers):
            self.weights.append(0.1 * np.random.randn(last_layer, size))
            self.biases.append(np.zeros((1, size)))
            last_layer = self.hidden_layers[i]
            
        self.weights.append(0.1 * np.random.randn(last_layer, self.output_data.shape[1]))
        self.biases.append(np.zeros((1, self.output_data.shape[1])))
        
                
        # build the 1st layer with inputs
    def activation(self, input, type):#Relu, sigmoid, tanh
        
        if type[0] == "relu":
            return np.maximum(0, input)
        elif type[0] == "sigmoid":
            return 1 / (1 + np.exp(-input))
        elif type[0] == "tanh":
            return np.tanh(input)
        elif type[0] == "lrelu": # THink about how you can have an alpha parameter when you want it 
            return np.maximum(type[1] * input, input) 
        else:
            raise Exception("Invalid activation function")

    def activation_derivative(self, input, type): # check this you should remember what function you had
        
        if type[0] == 'relu':
            return np.where(input > 0, 1, 0)
        elif type[0] == 'sigmoid':
            sig = self.activation(input, ('sigmoid', 0))
            return sig * (1 - sig)
        elif type[0] == 'tanh':
            return 1 - np.tanh(input)**2
        elif type[0] == 'lrelu':
            dx = np.ones_like(input)
            dx[input <= 0] = type[1]   # THink about how you can have an alpha parameter when you want it 
            return dx
        
        else:
            raise Exception("Invalid activation function derivative")
                              
            
    def forward(self, inputs):
        activations = []
        for i, size in enumerate(self.hidden_layers):
            z = np.dot(inputs, self.weights[i]) + self.biases[i] # dot product of the matrices "input" (the previous layer activations) and "self.weights" + self.biases which has same number of rows
            print(f'forwardsshape{z.shape}')
            a = self.activation(z, self.activation_function[i]) # meausiring activation
            activations.append((z, a)) # added to an array that can be accessed later
            inputs = a # output if the layer is the inout of the next
            
        z = np.dot(inputs, self.weights[-1]) + self.biases[-1]
        print(z.shape)
        a = z
        activations.append((z, a))
        print(f'activations {activations[-1][0].shape}')
        return activations ### weights and biases are recorded in self.weights and self.biases, outputs recorded in activations
        
        
    def compute_loss(self, predictions, targets):
        if self.loss_function == 'mse':
            return np.mean((predictions - targets) ** 2) # the loss function averaged out for normalisation
        else:
            raise Exception("Unsupported loss function")


    def train(self): # choosing optimizer (the method we use to train the model with)
        if self.optimizer == "SGD":
            return self.sgd_train()
        elif self.optimizer == "AdamW":
            return self.adamw_train()

    def backwards(self, activations, dvalues): # activations is just the return of the forward pass, dvalue
        # activations is an array with a tuple of (z, a) for each node in self.hidden_layers
        # we want to go backwards through this array and calculate the gradients of z and a
        # then create an array of grdient, weight, bias
    
        grads = []
        z, a = activations[-1]  # Retrieve the output layer activations
        delta = dvalues * self.activation_derivative(z, self.activation_function[-1])  # Compute delta for the output layer
        print(f'delta {delta.shape}')
        grads.append((delta, self.weights[-1], self.biases[-1]))  # Store gradients for output layer weights and biases

        # Backpropagate through hidden layers
        for i in reversed(range(len(self.hidden_layers))):
            z, a_prev = activations[i]
            delta = np.dot(delta, self.weights[i + 1].T) * self.activation_derivative(z, self.activation_function[i])
            print(f'layerdelta {delta.shape}')
            grads.append((delta, self.weights[i], self.biases[i]))

        return grads[::-1]  # Return gradients in reversed order for consistent weight updates


    def sgd_train(self):
        epoch_data = []
        loss_history = []
        # for i in epoch:
        # forward pass = self.forward(self.create_minibatches(self.input_data))
        # backwards = self.backwards(compute_loss(forward_pass))
        # self.update_weights(backwards)
        for epoch in range(self.epochs):
            for start in range(0, self.input_data.shape[0], self.batch_size):
                end = start + self.batch_size
                batch_inputs = self.input_data[start:end]
                batch_outputs = self.output_data[start:end]
    
                
                activations = self.forward(batch_inputs)
                predictions = activations[-1][0]
                print(predictions.shape, batch_outputs.shape)
                loss = self.compute_loss(predictions, batch_outputs)
                dvalues = 2 * (predictions - batch_outputs) / self.batch_size

                grads = self.backwards(activations, dvalues)
                self.update_weights(grads)

            if epoch % 2 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")
                epoch_data.append(epoch)
                loss_history.append(loss)

        plt.plot(epoch_data, loss_history)
        plt.xlabel("Iteration Number")
        plt.ylabel("Loss")
        plt.title("Iteration Number vs Loss")
        plt.show()
        
    def adamw_train(self):
        pass
    
    def update_weights(self, grads):
        # print(len(self.weights))
        # print(len(grads))
        if self.optimizer == "SGD":
            for i, (grad, weights, biases) in enumerate(grads):
                print(self.weights[i].shape, self.learning_rate, grad.shape)
                self.weights[i] -= self.learning_rate * grad.T
                self.biases[i] -= self.learning_rate * np.sum(grad, axis=0, keepdims=True)
            
        elif self.train == "AdamW":
            pass
        #else:
            #raise Exception("Invalid optimizer entered")

    def predict(self, inputs):
        activations = self.forward(inputs)
        return activations[-1][0]
## methods: build, train, use (give predictions)


In [246]:
X_train = np.random.uniform(-1, 1, (25, 20, 5)) #25, 20, 5

Y_train = np.random.uniform(0, 1, (25, 20, 2)) # 25, 20, 2