In [1]:
from Part_2_functions import *

In [2]:
from typing import Callable 
import numpy as np
from typing import List

Array_Function = Callable[[np.ndarray], np.ndarray]

Chain = List[Array_Function]

## Прямой проход

$$
\nu(X, W) = X @ W
$$
$$
L = \sum (\sigma(\nu(X, W))) = \sum_{i, j} (\sigma((XW)_{ij}))
$$

In [17]:
def forward_part_3(X: np.ndarray, W: np.ndarray, sigma: Array_Function) -> np.ndarray:
    assert_mulShape(X, W)
    
    XW = X @ W
    
    S = sigma(XW)

    L = np.sum(S)

    return L

## Обратный проход

$$
\frac{dL}{dW} = \frac{dL}{du}(\sigma(XW)) * \frac{d\sigma}{du}(XW) * \frac{d\nu}{dW}(X, W)
$$

*Функция суммы L может быть представлена в качестве умножения:*
$$
L(X) = e_m *X * e_n^T:
$$
$$X\ -\ матрица\ m\ на\ n$$
$$e_m = (1, 1, ..., 1)\ -\ m\ единиц$$
$$e_n = (1, 1, ..., 1)^T\ -\ n\ единиц$$
$$
\nabla L = e_m^T * e_n = (1 ... 1)_{mn}\ -\ m\ на\ n\ единиц
$$

In [7]:
def L(X: np.ndarray) -> float:
    return np.ones((1, X.shape[0])) @ X @ np.ones((X.shape[1], 1))

In [10]:
def deriv_L(X: np.ndarray):
    return np.ones_like(X)

In [28]:
def backward_part_3(X: np.ndarray, W: np.ndarray, sigma: Array_Function, dW = 'dW') -> np.ndarray:
    assert_mulShape(X, W)
    N = X @ W
    S = sigma(N)
    L = np.sum(S)
    dL_dS = np.ones_like(S)
    dS_dN = deriv(sigma, N)
    if dW == 'dW':
        dN_dW = np.transpose(X)
        return dN_dW @ (dL_dS * dS_dN)
    dN_dX = np.transpose(W)
    return dN_dX @ (dL_dS * dS_dN)

def backward_part_3(X: np.ndarray, W: np.ndarray, sigma: Array_Function, dW = 'dW') -> np.ndarray:
    assert_mulShape(X, W)
    N = X @ W
    S = sigma(N)
    L = np.sum(S)
    dL_dS = np.ones_like(S)
    dS_dN = deriv(sigma, N)
    if dW == 'dW':
        dN_dW = np.transpose(X)
        return dN_dW @ (dL_dS * dS_dN)
    dN_dX = np.transpose(W)
    return (dL_dS * dS_dN) @ dN_dX

In [29]:
np.random.seed(190204)

X = np.random.randn(3, 3)
W = np.random.randn(3, 2)

print('X:\n', X, '\n')
print('W:\n', W)

X:
 [[-1.57752816 -0.6664228   0.63910406]
 [-0.56152218  0.73729959 -1.42307821]
 [-1.44348429 -0.39128029  0.1539322 ]] 

W:
 [[ 0.75510818  0.25562492]
 [-0.56109271 -0.97504841]
 [ 0.98098478 -0.95870776]]


In [30]:
print('L:', forward_part_3(X, W, sigmoid))

L: 2.375495101411695


In [31]:
print('dLdX:', backward_part_3(X, W, sigmoid, 'dX'), sep='\n')

dLdX:
[[ 0.2488887  -0.37478057  0.01121962]
 [ 0.12604152 -0.27807404 -0.13945837]
 [ 0.22992798 -0.36623443 -0.02252592]]


In [32]:
print('dLdW:', backward_part_3(X, W, sigmoid, 'dW'), sep='\n')

dLdW:
[[-0.75783562 -0.87258156]
 [-0.18684539 -0.0853303 ]
 [ 0.06786442 -0.14142917]]
