<a href="https://colab.research.google.com/github/AlbertoMontanelli/Machine-Learning/blob/class_unit/neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
from sympy import symbols, diff

# dobbiamo capire come aggiornare i bias
# dobbiamo capire come funziona il minibatch
# dobbiamo capire che funzioni di attivazione usare e quali derivate
# da iniziare a fare successivamente: cross validation, test vs training error, ricerca di iperparametri (grid search, n layer, n unit,
# learning rule), nr epochs/early stopping, tikhonov regularization, momentum, adaline e altre novelties

def sigmoid(net):
    return 1 / (1 + np.exp(-net))

def d_sigmoid(net):
    return np.exp(-net) / (1 + np.exp(-net))**2

def tanh(net):
    return np.tanh(net)

def d_tanh(net):
    return 1 - (np.tanh(net))**2

def softmax(net):
    return np.exp(net) / np.sum(np.exp(net), axis = 1, keepdims=True)

def softmax_derivative(net):
    batch_size, current_neuron_size = net.shape
    jacobians = np.zeros((batch_size, current_neuron_size, current_neuron_size))

    for i in range(batch_size):
        s = net[i].reshape(-1, 1)  # vettore colonna
        jacobians[i] = np.diagflat(s) - np.dot(s, s.T)

    return jacobians

def softplus(net):
    return np.log(1 + np.exp(net))

def d_softplus(net):
    return np.exp(net) / (1 + np.exp(net))

def linear(net):
    return net

def d_linear(net):
    return 1

def ReLU(net):
    return np.maximum(net, 0)

def d_ReLU(net):
    return 0 if(net<0) else 1

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2, axis = 1, keepdims=True) # in output ho un numero

def d_mean_squared_error(y_true, y_pred):
    return 2 * (y_true - y_pred) / y_true.size # in output ho un array grande come il numero di nauroni del layer

class Layer:
    def __init__(self, input_size, output_size, activation_function, activation_derivative):
        #input_size: number of previous units
        #output_size: number of actual units
        self.weights = np.random.uniform(low=-1/np.sqrt(input_size), high=1/np.sqrt(input_size), size=(input_size, output_size)) # random from uniform distribution in [-1/a, 1/a] with a=sqrt(number of neuron of previous layer)
        self.biases = np.zeros((1, output_size)) # array 1D perché lo stesso per tutti gli esempi, identifica un neurone. must be 0
        self.activation_function = activation_function
        self.activation_derivative = activation_derivative

    # funzione che ci permette di calcolare gli output del layer. PRende come input l'output del layer precedente
    def forward(self, input_array):
        self.input = input_array # array 1D of previous unit or matrix with number of row = number of examples
        self.net = np.dot(self.input, self.weights) + self.biases # if I have more than 1 exaples, numpy uses brodcasting
        self.output = self.activation_function(self.net) #f(net)
        return self.output

    def backward(self, d_Ep, learning_rate):
        # d_Ep = target - output solo per output layer, il resto delle volte d_Ep = sum_delta_weights
        delta = d_Ep * self.activation_derivative(self.net)
        self.weights += learning_rate * np.dot(self.input.T, delta)
        self.biases += learning_rate * np.sum(delta, axis = 0, keepdims = True)
        sum_delta_weights = np.dot(delta, self.weights.T)
        return sum_delta_weights

class NeuralNetwork:
    def __init__(self):
        self.layers = [] # questa riga serve ad inizializzare una lista vuota. tutti i layers che verranno creati verranno aggiunti a questa lista

    def add_layer(self, layer):
        self.layers.append(layer)

    # ora dobbiamo fare la backprop per tutti i layer
    def forward(self, input): # questo input array sono proprio i dati che abbiamo a disposizione
        for layer in self.layers:
            input = layer.forward(input) # restituisce l'array di output e lo inserisco in input così da usarlo per il layer dopo
        return input

    def backward(self, d_Ep, learning_rate):
        for layer in reversed(self.layers): # così attraversa la lista in ordine inverso. il gradiente dell'errore propaga all'inverso
            d_Ep = layer.backward(d_Ep, learning_rate)

    # x: dataset, examples x features
    # epochs:  quante volte passo attraverso la rete neurale. Lo scelgo io??
    # loss function and derivative: MSE

    def train(self, x_train, target, epochs, learning_rate, loss_function, loss_function_derivative):
        for epoch in range(epochs):
            # Forward propagation
            predictions = self.forward(x_train) # ritorna gli output dell'ultimo layer

            # Compute loss and loss gradient for backward function
            loss = loss_function(target, predictions)
            loss_gradient = loss_function_derivative(target, predictions)

            # Backward propagation
            self.backward(loss_gradient, learning_rate)

            # Print loss every 10 epochs
            if epoch % 1 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

#test
x = np.random.rand(3, 3)

target = np.random.rand(3, 2)
print('target:', target)
print('\n')

layer_one = Layer(3, 2, linear, d_linear)

layer_two = Layer(2, 2, linear, d_linear)

NN = NeuralNetwork()
NN.add_layer(layer_one)
NN.add_layer(layer_two)
NN.train(x, target, 2, 0.01, mean_squared_error, d_mean_squared_error)


target: [[0.61923639 0.77568904]
 [0.04217537 0.27944788]
 [0.8421818  0.79270981]]


Epoch 0, Loss: [[0.43710779]
 [0.10591626]
 [0.5712469 ]]
Epoch 1, Loss: [[0.41921255]
 [0.10048645]
 [0.54884559]]
