In [0]:
import time
import math
import numpy as np
import matplotlib.pyplot as plt

## y = x^2 approximation

In [0]:
x_train = np.linspace(0, 1, 1000).reshape(1,-1)
y_train = np.square(x_train)

In [0]:
x_train.shape, y_train.shape

In [0]:
plt.scatter(x_train, y_train)

In [0]:
def initialize_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims)

    for l in range(1, L):
        parameters[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        parameters[f'b{l}'] = np.zeros((layer_dims[l], 1))
    
    return parameters

In [0]:
def linear_forward(A, W, b):
    """
    Linear part of forward propagation.
    Arguments:
    A -- activations from previous layer / input data. dim: (previous layer, num of examples)
    W -- weights matrix. dim: (current layer, previous layer)
    b -- bias vector. dim: (current layer, 1)
    Returns:
    Z -- input of activation function;
    cache -- stored for backward pass later
    """

    Z = np.dot(W, A) + b
    cache = (A, W, b)

    return Z, cache

In [0]:
def relu(Z):
    A = np.maximum(0,Z)
    cache = Z
    return A, cache

In [0]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

In [0]:
def activation_forward(A_prev, W, b, activation):
    Z, linear_cache = linear_forward(A_prev, W, b)
    if activation == 'relu':
        A, activation_cache = relu(Z)
    elif activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
    cache = (linear_cache, activation_cache)
    return A, cache

In [0]:
def model_forward(X, parameters):
    """
    Forward propagation.
    X -- input data (input dim, number of examples);
    parameters -- output of initialize_parameters()

    Returns:
    A_last: last post activation value;
    caches -- list of caches for each step
    """
    caches = []
    A = X
    L = len(parameters) // 2

    for l in range(1, L):
        A_prev = A
        A, cache = activation_forward(A_prev, parameters[f'W{l}'], parameters[f'b{l}'], 'relu')
        caches.append(cache)
    
    A_last, cache = activation_forward(A, parameters[f'W{L}'], parameters[f'b{L}'], 'sigmoid')
    caches.append(cache)

    return A_last, caches

In [0]:
def compute_mae(A_last, Y):

    m = Y.shape[1]
    mae = abs(np.sum(A_last - Y) / m)
    return mae

In [0]:
def compute_approx_error(A_last, Y):

    m = Y.shape[1]
    approx = abs(1 - np.linalg.norm(A_last) / np.linalg.norm(Y)) # to avoid division by 0
    return approx

In [0]:
compute_mae(np.linspace(0.5, 1, 5).reshape(1,-1), np.linspace(0, 1, 5).reshape(1,-1))

In [0]:
def linear_backward(dZ, cache):
    """
    Linear portion of backprop for 1 layer.
    dZ - gradient of the cost with respect to linear output (of current layer l)
    cache - tuple of (A_prev, W, b) from forward of this layer
    Returns:
    dA_prev -- gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- gradient of the cost with respect to W (current layer l), same shape as W
    db -- gradient of the cost with respect to b (current layer l), same shape as b
    """

    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis = 1, keepdims = True) / m
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [0]:
def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)

    dZ[Z<=0] = 0

    return dZ

In [0]:
def sigmoid_backward(dA, cache):
    Z = cache

    s = 1/(1+np.exp(-Z))
    dZ = dA*s*(1-s)

    return dZ

In [0]:
def activation_backward(dA, cache, activation):
    """
    Backprop for linear -> activation.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) for computing backward propagation
    activation -- activation to be used in this layer: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache

    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
    else:
        dZ = sigmoid_backward(dA, activation_cache)
    
    dA_prev, dW, db = linear_backward(dA, linear_cache)
    return dA_prev, dW, db

In [0]:
def model_backward(A_last, Y, caches):
    grads = {}
    L = len(caches)

    dA_last = ((A_last > Y).astype(np.int) * 2) - 1  # derivative for MAE: 1 if Ypred > Ytrue, -1 otherwise

    current_cache = caches[L-1]
    grads[f'dA{L-1}'], grads[f'dW{L}'], grads[f'db{L}']  = activation_backward(dA_last, current_cache, sigmoid)

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = activation_backward(grads[f'dA{l+1}'], current_cache, 'relu')
        grads[f'dA{l}'] = dA_prev_temp
        grads[f'dW{l+1}'] = dW_temp
        grads[f'db{l+1}'] = db_temp
    
    return grads

In [0]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    for l in range(L):
        parameters[f'W{l+1}'] -= learning_rate * grads[f'dW{l+1}']
        parameters[f'b{l+1}'] -= learning_rate * grads[f'db{l+1}']
    return parameters

In [0]:
def model(X, Y, nodes_hidden, num_layers, learning_rate = 0.001, num_epochs = 1000, print_cost = False):
    np.random.seed(42)
    grads = {}
    costs = []
    m = X.shape[1]

    layer_dims = [1] + [nodes_hidden]*num_layers + [1]

    parameters = initialize_parameters(layer_dims)

    for i in range(1, num_epochs+1):
        A_last, caches = model_forward(X, parameters)
        cost = compute_mae(A_last, Y)

        grads = model_backward(A_last, Y, caches)

        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % 200 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        costs.append(cost)

    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('epochs (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    print(f'Minimal cost: {min(costs)}, at {costs.index(min(costs)) + 1} epochs')
    
    return parameters

In [0]:
parameters = model(x_train, y_train, 4, 10, num_epochs = 3000, print_cost = True)  # 2612 эпох - минимальное значение.