## Below is an image of a simple feed forward neural network with 3 layers. It contains 2 input, hidden and output neurons. We are trying to find the optimal weights for the desired outputs O1 & O2.
![My Image](FFNN.png)

## Constants

In [1]:
from Sigmoid import sigmoid
alpha = 0.5 # Learning rate
O1target = 0.9
O2target = 0.1
epochs = 1000

## Initializing parameters

In [2]:
def initialize_parameters():
    params = {
        'X1': 0.7, 'X2': 0.3,
        'b1': 0.4, 'b2': 0.4, 'b3': 0.6, 'b4': 0.6,
        'w1': 0.3, 'w2': 0.37, 'w3': 0.35, 'w4': 0.27,
        'w6': 0.42, 'w7': 0.25, 'w8': 0.15, 'w9': 0.45
    }
    return params

## Forward propagation

In [3]:
def forward_pass(params):
    X1, X2 = params['X1'], params['X2']
    w1, w2, w3, w4 = params['w1'], params['w2'], params['w3'], params['w4']
    w6, w7, w8, w9 = params['w6'], params['w7'], params['w8'], params['w9']
    b1, b2, b3, b4 = params['b1'], params['b2'], params['b3'], params['b4']

    H1net = X1*w4 + X2*w2 + b1
    H1out = sigmoid(H1net)

    H2net = X1*w3 + X2*w1 + b2
    H2out = sigmoid(H2net)

    O1net = H1out*w6 + H2out*w9 + b3
    O1out = sigmoid(O1net)

    O2net = H1out*w7 + H2out*w8 + b4
    O2out = sigmoid(O2net)

    return H1out, H2out, O1out, O2out

## Cost function

In [4]:
def compute_cost(O1out, O2out, O1target, O2target):
    EO1 = 0.5*(O1target-O1out)**2
    EO2 = 0.5*(O2target-O2out)**2
    cost = EO1 + EO2
    return cost

## Backward Propagation

In [5]:
def backward_pass(params, H1out, H2out, O1out, O2out):
    X1, X2 = params['X1'], params['X2']
    w6, w7, w8, w9 = params['w6'], params['w7'], params['w8'], params['w9']
    w1, w2, w3, w4 = params['w1'], params['w2'], params['w3'], params['w4']

    dw6 = (O1out-O1target)*(O1out*(1-O1out))*H1out
    dw8 = (O2out-O2target)*(O2out*(1-O2out))*H2out
    dw9 = (O1out-O1target)*(O1out*(1-O1out))*H2out
    dw7 = (O2out-O2target)*(O2out*(1-O2out))*H1out
    
    dw4one = (O1out-O1target)*(O1out*(1-O1out))*w6*(H1out*(1-H1out))*X1
    dw4two = (O2out-O2target)*(O2out*(1-O2out))*w7*(H1out*(1-H1out))*X1
    dw4 = dw4one + dw4two
    dw1one = (O1out-O1target)*(O1out*(1-O1out))*w9*(H2out*(1-H2out))*X2
    dw1two = (O2out-O2target)*(O2out*(1-O2out))*w8*(H2out*(1-H2out))*X2
    dw1 = dw1one + dw1two
    dw3one = (O1out-O1target)*(O1out*(1-O1out))*w9*(H2out*(1-H2out))*X1
    dw3two = (O2out-O2target)*(O2out*(1-O2out))*w8*(H2out*(1-H2out))*X1
    dw3 = dw3one + dw3two
    dw2one = (O1out-O1target)*(O1out*(1-O1out))*w6*(H1out*(1-H1out))*X2
    dw2two = (O2out-O2target)*(O2out*(1-O2out))*w7*(H1out*(1-H1out))*X2
    dw2 = dw2one + dw2two
    
    db3 = (O1out-O1target)*(O1out*(1-O1out))*1
    db4 = (O2out-O2target)*(O2out*(1-O2out))*1
    
    db2one = (O1out-O1target)*(O1out*(1-O1out))*w9*(H2out*(1-H2out))*1
    db2two = (O2out-O2target)*(O2out*(1-O2out))*w8*(H2out*(1-H2out))*1
    db2 = db2one + db2two
    db1one = (O1out-O1target)*(O1out*(1-O1out))*w6*(H1out*(1-H1out))*1
    db1two = (O2out-O2target)*(O2out*(1-O2out))*w7*(H1out*(1-H1out))*1
    db1 = db1one + db1two

    grads = {
        'dw1': dw1, 'dw2': dw2, 'dw3': dw3, 'dw4': dw4, 'dw6': dw6, 'dw7': dw7, 'dw8': dw8, 'dw9': dw9,
        'db1': db1, 'db2': db2, 'db3': db3, 'db4': db4 
    }
    return grads

## Parameter Update

In [6]:
def update_parameters(params, grads, alpha):
    # Preserve inputs
    X1, X2 = params['X1'], params['X2']

    # Extract current parameters
    w1, w2, w3, w4 = params['w1'], params['w2'], params['w3'], params['w4']
    w6, w7, w8, w9 = params['w6'], params['w7'], params['w8'], params['w9']
    b1, b2, b3, b4 = params['b1'], params['b2'], params['b3'], params['b4']

    # Extract gradients
    dw1, dw2, dw3, dw4 = grads['dw1'], grads['dw2'], grads['dw3'], grads['dw4']
    dw6, dw7, dw8, dw9 = grads['dw6'], grads['dw7'], grads['dw8'], grads['dw9']
    db1, db2, db3, db4 = grads['db1'], grads['db2'], grads['db3'], grads['db4']

    # Update parameters
    params['w1'] = w1 - alpha * dw1
    params['w2'] = w2 - alpha * dw2
    params['w3'] = w3 - alpha * dw3
    params['w4'] = w4 - alpha * dw4
    params['w6'] = w6 - alpha * dw6
    params['w7'] = w7 - alpha * dw7
    params['w8'] = w8 - alpha * dw8
    params['w9'] = w9 - alpha * dw9

    params['b1'] = b1 - alpha * db1
    params['b2'] = b2 - alpha * db2
    params['b3'] = b3 - alpha * db3
    params['b4'] = b4 - alpha * db4

    # Preserve inputs in the returned dictionary
    params['X1'] = X1
    params['X2'] = X2

    return params


## Model Training

In [7]:
def train():
    params = initialize_parameters()
    for epoch in range(1, epochs + 1):
        H1out, H2out, O1out, O2out = forward_pass(params)
        cost = compute_cost(O1out, O2out, O1target, O2target)
        grads = backward_pass(params, H1out, H2out, O1out, O2out)
        params = update_parameters(params, grads, alpha)

        print(f"Epoch {epoch} - Cost: {cost:.6f}")
        print(f"    O1out: {O1out:.6f}, O2out: {O2out:.6f}")

    print("\nFinal Weights and Biases:")
    for key in sorted(params):
        if not key.startswith('X'):  # Skip inputs
            print(f"  {key}: {params[key]:.4f}")

# Run Training
train()

Epoch 1 - Cost: 0.191665
    O1out: 0.765809, O2out: 0.704419
Epoch 2 - Cost: 0.176006
    O1out: 0.769804, O2out: 0.678844
Epoch 3 - Cost: 0.160403
    O1out: 0.773610, O2out: 0.652115
Epoch 4 - Cost: 0.145154
    O1out: 0.777247, O2out: 0.624632
Epoch 5 - Cost: 0.130542
    O1out: 0.780729, O2out: 0.596849
Epoch 6 - Cost: 0.116811
    O1out: 0.784068, O2out: 0.569234
Epoch 7 - Cost: 0.104135
    O1out: 0.787273, O2out: 0.542225
Epoch 8 - Cost: 0.092620
    O1out: 0.790352, O2out: 0.516195
Epoch 9 - Cost: 0.082298
    O1out: 0.793312, O2out: 0.491425
Epoch 10 - Cost: 0.073143
    O1out: 0.796156, O2out: 0.468106
Epoch 11 - Cost: 0.065086
    O1out: 0.798889, O2out: 0.446337
Epoch 12 - Cost: 0.058035
    O1out: 0.801516, O2out: 0.426144
Epoch 13 - Cost: 0.051882
    O1out: 0.804041, O2out: 0.407499
Epoch 14 - Cost: 0.046521
    O1out: 0.806467, O2out: 0.390335
Epoch 15 - Cost: 0.041850
    O1out: 0.808800, O2out: 0.374559
Epoch 16 - Cost: 0.037775
    O1out: 0.811042, O2out: 0.360071
E

## After 1000 epochs, the model gives O1out as 0.899996 (vs 0.9) and O2out as 0.100007 (vs 0.1). It also provides all the optimal weights and biases.