## Course: Deep Learning
### Date: Nov-Dec 2023

For this exercise, we had to implement a very basic neural network with scalar backpropagation. The network is shown below. The formulas were known, and we were only allowed to use the `math` module.

<img src="./DeepLearning_Assignment1.png" alt="Alternative text" />

In [None]:
import math

# Softmax activation.
def softmax(x):
    exps = []
    probs = []
    for element in x:
        exps.append(math.exp(element))
    expsum = sum(exps)
    for i in range(len(exps)):
        probs.append(exps[i] / expsum)
    return probs

# Sigmoid activation.
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# Cross-entropy loss.
def crossentropy(t, y):
    loss = 0
    for i in range(len(y)):
        if t[i] == i:
            loss += -math.log(y[i])
    return loss

# Compute the derivatives in one backward pass.
def backward_pass(t, y, h, k, x):
    # Derivatives.
    dy = [0.,0.]
    dv = [[0.,0.], [0.,0.], [0.,0.]]
    dc = [0.,0.]
    dh = [0., 0., 0.]
    dk = [0., 0., 0.]
    dw = [[0., 0., 0.], [0., 0., 0.]]
    db = [0.,0.,0.]

    # Pass each layer.
    for i in range(len(y)):
        dy[i] = y[i]-t[i]

    for j in range(len(y)):
        for i in range(len(h)):
            dv[i][j] = dy[j] * h[i]
            dh[i] = dy[j] * sum(dv[i])
        dc[j] = dy[j]

    for i in range(len(h)):
        dk[i] = dh[i] * h[i] * (1-h[i])

    for j in range(len(k)):
        for i in range(len(x)):
            dw[i][j] = dk[j] * x[i]
        db[j] = dk[j]
        
    return dy, dv, dh, dc, dk, dw, db

# Compute the values after going through the network.
def forward_pass(w, v, b, c, x, t):
    # New empty layer.
    k = [0., 0., 0.]
    h = [0., 0., 0.]
    # New empty output.
    y = [0., 0.]
    o = [0., 0.]
    
    # Compute the first linear layer.
    for j in range(len(k)):
        for i in range(len(x)):
            k[j] += w[i][j] * x[i]
        k[j] += b[j]

    # Apply sigmoid activation on the first layer.
    for i in range(len(k)):
        h[i] = sigmoid(k[i])

    # Compute the linear output.
    for j in range(len(y)):
        for i in range(len(h)):
            o[j] += v[i][j] * h[i]
        o[j] += c[j]

    # Apply softmax activation on the output.
    y = softmax(o)

    loss = crossentropy(t, y)
    
    return k, h, o, y, loss

# Initialization.
# Weights.
w = [[1., 1., 1.], [-1., -1., -1.]] # from input to layer
v = [[1., 1.], [-1., -1.], [-1., -1.]] # from layer to output layer
# Biases
b = [0,0,0] # for first layer
c = [0,0] # for output layer
# Inputs
x = [1, -1]
# Classes
t = [1, 0]

# Do forward pass.
k, h, o, y, loss = forward_pass(w, v, b, c, x, t)

# Do backward pass.
dy, dv, dh, dc, dk, dw, db = backward_pass(t, y, h, k, x)