# Forward Propagation

$$
\begin{align*}
\mathbf{x} &- \text{input} \\
\mathbf{z}^{(1)} &= \mathbf{W}^{(1)} \mathbf{x} + \mathbf{b}^{(1)} \\
\mathbf{h}^{(1)} &= \phi(\mathbf{z}^{(1)}) \\
\mathbf{z}^{(2)} &= \mathbf{W}^{(2)} \mathbf{h}^{(1)} + \mathbf{b}^{(2)} \\
\mathbf{h}^{(2)} &= \phi(\mathbf{z}^{(2)}) \\
&\vdots \\
\mathbf{z}^{(n-1)} &= \mathbf{W}^{(n-1)} \mathbf{h}^{(n-2)} + \mathbf{b}^{(n-1)} \\
\mathbf{h}^{(n-1)} &= \phi(\mathbf{z}^{(n-1)}) \\
\mathbf{o} &= \mathbf{W}^{(n)} \mathbf{h}^{(n-1)} + \mathbf{b}^{(n)} \\
\end{align*}
$$

# Backward Propagation

## Chain Rule

$$
\frac{\partial Z}{\partial X} = \frac{\partial Z}{\partial Y} \frac{\partial Y}{\partial X}
$$

## Objective

$$
\text{Calculate: }
\frac{\partial J}{\partial W^{(n)}} \rightarrow
\dots \rightarrow
\frac{\partial J}{\partial W^{(2)}} \rightarrow
\frac{\partial J}{\partial W^{(1)}}
$$

$$
\begin{equation}
\begin{split}
\frac{\partial J}{\partial W^{(n)}} &= \frac{\partial L}{\partial W^{(n)}} + \frac{\partial s}{\partial W^{(n)}} \\
\\
\frac{\partial L}{\partial W^{(n)}} &= \frac{\partial L}{\partial o} \frac{\partial o}{\partial W^{(n)}} \\
&= \frac{\partial L}{\partial o} \frac{\partial (W^n h^{(n-1)})}{\partial W^{(n)}} \\
&= \frac{\partial L}{\partial o} h^{(n-1)T} \\
&= (o - y) h^{(n-1)T} \\
\\
\frac{\partial s}{\partial W^{(n)}} &= \lambda W^{(n)} \\
\\
\Rightarrow\frac{\partial J}{\partial W^{(n)}} &= (o - y) h^{(n-1)T} + \lambda W^{(n)} 
\end{split}
\end{equation}
$$

$$
\begin{equation}
\begin{split}
\frac{\partial J}{\partial W^{(i)}} &= \frac{\partial L}{\partial W^{(i)}} + \frac{\partial s}{\partial W^{(i)}} \\
\\
\frac{\partial L}{\partial W^{(i)}} &= \frac{\partial L}{\partial z^{(i)}} h^{(i-1)T} \\
\\
\frac{\partial L}{\partial z^{(i)}} &= \frac{\partial L}{\partial h^{(i)}} \frac{\partial h^{(i)}}{\partial z^{(i)}} \\
&= \frac{\partial L}{\partial h^{(i)}} \frac{\partial \phi(z^{(i)})}{\partial z^{(i)}} \\
&= \frac{\partial L}{\partial h^{(i)}} \phi'(z^{(i)}) \\
\\
\frac{\partial L}{\partial h^{(i)}} &= \frac{\partial L}{\partial z^{(i+1)}} \frac{\partial z^{(i+1)}}{\partial h^{(i)}} \\
&= \frac{\partial L}{\partial z^{(i+1)}} \frac{\partial (W^{(i+1)} h^{(i)})}{\partial h^{(i)}} \\
&= \frac{\partial L}{\partial z^{(i+1)}} W^{(i+1)} \\
\\
\frac{\partial s}{\partial W^{(i)}} &= \lambda W^{(i)} \\
\\
\Rightarrow\frac{\partial J}{\partial W^{(i)}} &= \left( \frac{\partial L}{\partial z^{(i)}} h^{(i-1)T} \right ) + \lambda W^{(i)} \\
\\
\text{if } i = n - 1: \\
\Rightarrow\frac{\partial L}{\partial h^{(n-1)}} &= \frac{\partial L}{\partial o} W^{(n)} \\
&= (o - y) W^{(n)} \\
\\
\Rightarrow\frac{\partial L}{\partial z^{(n-1)}} &= \left ( (o - y) W^{(n)} \right ) \circ \phi'(z^{(n-1)}) \\
\\
\Rightarrow\frac{\partial J}{\partial W^{(n-1)}} &= \left( \left ( (o - y) W^{(n)} \right ) \circ \phi'(z^{(n-1)}) \right ) h^{(n-2)T} + \lambda W^{(n-1)}
\end{split}
\end{equation}
$$

## Conclusion

$$
\begin{split}
\frac{\partial L}{\partial o} &= o - y \\
\frac{\partial L}{\partial W^{(n)}} &= \frac{\partial L}{\partial o} h^{(n-1)T} \\
\frac{\partial s}{\partial W^{(n)}} &= \lambda W^{(n)} \\
\frac{\partial J}{\partial W^{(n)}} &= \frac{\partial L}{\partial W^{(n)}} + \frac{\partial s}{\partial W^{(n)}} \\
\\
\frac{\partial L}{\partial h^{(n-1)}} &= \frac{\partial L}{\partial o} W^{(n)} \\
\frac{\partial L}{\partial z^{(n-1)}} &= \frac{\partial L}{\partial h^{(n-1)}} \circ \phi'(z^{(n-1)}) \\
\frac{\partial L}{\partial W^{(n-1)}} &= \frac{\partial L}{\partial z^{(n-1)}} h^{(n-2)T} \\
\frac{\partial s}{\partial W^{(n-1)}} &= \lambda W^{(n-1)} \\
\frac{\partial J}{\partial W^{(n-1)}} &= \frac{\partial L}{\partial W^{(n-1)}} + \frac{\partial s}{\partial W^{(n-1)}} \\
\\
&\vdots \\
\\
\frac{\partial L}{\partial h^{(1)}} &= \frac{\partial L}{\partial z^{(2)}} W^{(2)} \\
\frac{\partial L}{\partial z^{(1)}} &= \frac{\partial L}{\partial h^{(1)}} \circ \phi'(z^{(1)}) \\
\frac{\partial L}{\partial W^{(1)}} &= \frac{\partial L}{\partial z^{(1)}} xT \\
\frac{\partial s}{\partial W^{(1)}} &= \lambda W^{(1)} \\
\frac{\partial J}{\partial W^{(1)}} &= \frac{\partial L}{\partial W^{(1)}} + \frac{\partial s}{\partial W^{(1)}} \\
\end{split}
$$