# Perceptron 2 inputs, 1 hidden layer, 1 output

Exercice (perceptron):
- Réseau de neurones à deux entrées, une sortie et une couche cachée avec un neurone
- Les entrées sont 0.1 et 0.5, la sortie souhaitée est 0.2 
- Ecrire un code pour la forward pass.
- Calculer l'erreur moyenne quadratique.
- Rétropropager l'erreur.
- Entraîner le réseau

In [1]:
import numpy as np

i1 = 0.1
i2 = 0.5
target_o = 0.2

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def fwd_pass(w1, w2, w3, b1, b2):
    net_h = i1*w1 + i2*w2 + b1
    out_h = sigmoid(net_h)
    net_o = out_h*w3 + b2
    out_o = sigmoid(net_o)
    return net_h, out_h, net_o, out_o

def grad_w():
    grad_w1 = -(target_o - out_o)*(1-out_o)*out_o*w3*(1-out_h)*out_h*i1
    grad_w2 = -(target_o - out_o)*(1-out_o)*out_o*w3*(1-out_h)*out_h*i2
    grad_w3 = -(target_o - out_o)*(1-out_o)*out_o*out_h
    return grad_w1, grad_w2, grad_w3

def update():
    w1 = w1 - grad_w1
    w2 = w2 - grad_w2
    w3 = w3 - grad_w3
    return w1, w2, w3

In [None]:
import numpy as np

class Perceptron:
    def __init__(self, input_size, hidden_size, output_size, learning_rate, epochs):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights_ih = np.random.randn(input_size, hidden_size)
        self.bias_h = np.zeros(hidden_size)
        self.weights_ho = np.random.randn(hidden_size, output_size)
        self.bias_o = np.zeros(output_size)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, X):
        self.hidden = self.sigmoid(np.dot(X, self.weights_ih) + self.bias_h)
        self.output = np.dot(self.hidden, self.weights_ho) + self.bias_o
        return self.output

    def mse_loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

    def backprop(self, X, y_true):
        delta_o = -2 * (y_true - self.output)
        delta_h = delta_o.dot(self.weights_ho.T) * self.hidden * (1 - self.hidden)
        self.weights_ho += self.hidden.T.dot(delta_o) * self.learning_rate
        self.bias_o += np.sum(delta_o, axis=0) * self.learning_rate
        self.weights_ih += X.T.dot(delta_h) * self.learning_rate
        self.bias_h += np.sum(delta_h, axis=0) * self.learning_rate

    def fit(self, X, y_true):
        for epoch in range(self.epochs):
            self.forward(X)
            loss = self.mse_loss(y_true, self.output)
            self.backprop(X, y_true)
            if epoch % 1000 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.4f}")

    def predict(self, X):
        return self.forward(X)

In [None]:
class percept():
    
    def __init__(self, lr, epochs):
        self.lr = lr
        self.epochs = epochs
    
    def sigmoid(x):
        return 1/(1+np.exp(-x))

    def fwd_pass(self, w1, w2, w3, b1, b2):
        net_h = i1*w1 + i2*w2 + b1
        out_h = sigmoid(net_h)
        net_o = out_h*w3 + b2
        out_o = sigmoid(net_o)
        return net_h, out_h, net_o, out_o

def grad_w():
    grad_w1 = -(target_o - out_o)*(1-out_o)*out_o*w3*(1-out_h)*out_h*i1
    grad_w2 = -(target_o - out_o)*(1-out_o)*out_o*w3*(1-out_h)*out_h*i2
    grad_w3 = -(target_o - out_o)*(1-out_o)*out_o*out_h
    return grad_w1, grad_w2, grad_w3

def update():
    w1 = w1 - grad_w1
    w2 = w2 - grad_w2
    w3 = w3 - grad_w3
    return w1, w2, w3

## Perceptron with two inputs, one hidden layer and one output

## The Error

$Err = \frac{1}{2}(target_o - out_o)^2$  

## Forward Pass

$net_h = i_1*w_1 + i_1*w_2 + b_1$  

$out_h = sigmoid(net_h)$  

$net_o = out_h*w_3 + b_2$  

$out_o = sigmoid(net_o)$

# The Gradient

$\frac{\partial Err}{\partial w_1} = \frac{\partial Err}{\partial out_o}*\frac{\partial out_o}{\partial net_o}*\frac{\partial net_o}{\partial out_h}*\frac{\partial out_h}{\partial net_h}*\frac{\partial net_h}{\partial w_1}$  

$\frac{\partial Err}{\partial w_2} = \frac{\partial Err}{\partial out_o}*\frac{\partial out_o}{\partial net_o}*\frac{\partial net_o}{\partial out_h}*\frac{\partial out_h}{\partial net_h}*\frac{\partial net_h}{\partial w_2}$  

$\frac{\partial Err}{\partial w_3} = \frac{\partial Err}{\partial out_o}*\frac{\partial out_o}{\partial net_o}*\frac{\partial net_o}{\partial w_3}$

We have:  

$ Err = \frac{1}{2}(target_o - out_o)^2$

$\Longrightarrow \frac{\partial Err}{\partial out_o} = \frac{1}{2}.2.(-1).(target_o - out_o) = -(target_o - out_o)$

$f(x) = sigmoid(x) \Longrightarrow f'(x)=(1-f(x)).f(x)$

$ \Longrightarrow out_o = sigmoid(net_o) \Longrightarrow \frac{\partial out_o}{net_o} = (1 - out_o)out_o$

$ \Longrightarrow out_h = sigmoid(net_h) \Longrightarrow \frac{\partial out_h}{net_h} = (1 - out_h)out_h$

$ \frac{\partial net_o}{out_h} = w_3$

$ \frac{\partial net_o}{w_3} = out_h$

$ \frac{\partial net_h}{w_2} = i_2$

# The Gradient

$\frac{\partial Err}{\partial w_1} = -(target_o - out_o)(1-out_o)out_o.w_3.(1-out_h)out_h.i_1$  

$\frac{\partial Err}{\partial w_2} = -(target_o - out_o)(1-out_o)out_o.w_3.(1-out_h)out_h.i_2$ 

$\frac{\partial Err}{\partial w_3} = -(target_o - out_o)(1-out_o)out_o.out_h$