In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import truncnorm

In [2]:
X = pd.DataFrame(np.linspace(0, 1, num=3).reshape(-1, 1), columns=['Dosage'])
X

Unnamed: 0,Dosage
0,0.0
1,0.5
2,1.0


In [3]:
target = np.array([0, 1, 0]).reshape(-1, 1)
target

array([[0],
       [1],
       [0]])

# 1. Initialize Network

In [43]:
def init_weights(X, n_neurons, random_state=42):
    n_inputs = int(X.shape[1])
    stddev = 2 / np.sqrt(n_inputs + n_neurons)
    b = np.zeros(n_neurons)
    weights = truncnorm.rvs(-1, 1, size=(n_inputs, n_neurons), scale=stddev, random_state=random_state)
    
    return pd.DataFrame(weights), b

In [44]:
weights, biases = init_weights(X, 2)
print(weights)
biases

          0         1
0 -0.249842  1.003948


array([0., 0.])

# 2. Forward Propagate

In [45]:
def softmax(x):
    return np.log(1 + np.exp(x))

In [46]:
def crossenthropy(x):
    return 1. / (1. + np.exp(-x))

In [47]:
X

Unnamed: 0,Dosage
0,0.0
1,0.5
2,1.0


In [48]:
weights

Unnamed: 0,0,1
0,-0.249842,1.003948


In [49]:
np.dot(X, weights)

array([[ 0.        ,  0.        ],
       [-0.12492101,  0.5019739 ],
       [-0.24984201,  1.00394781]])

In [50]:
np.dot(X, weights) + biases

array([[ 0.        ,  0.        ],
       [-0.12492101,  0.5019739 ],
       [-0.24984201,  1.00394781]])

In [51]:
def neuron_layer(X, weights, b, activation=None):
    Z = np.dot(X, weights) + b
    if activation is not None:
        Z = activation(Z)
        pass
    Z = pd.DataFrame(Z)
    return Z

In [52]:
hidden1 = neuron_layer(X, weights, biases, activation=None)
hidden1

Unnamed: 0,0,1
0,0.0,0.0
1,-0.124921,0.501974
2,-0.249842,1.003948


In [53]:
weights_2, biases_2 = init_weights(hidden1, 1, random_state=43)
display(weights_2)
biases_2

Unnamed: 0,0
0,-0.825995
1,0.21678


array([0.])

In [54]:
output = neuron_layer(hidden1, weights_2, biases_2, activation=None)
output

Unnamed: 0,0
0,0.0
1,0.212002
2,0.424004


# 3. Back Propagate Error

## Sum of squared residuals

$$\Large SSR = \sum_{i-1}^n{(Observed_i - Predicted_i)^2}$$

In [55]:
target = pd.DataFrame(target)
target

Unnamed: 0,0
0,0
1,1
2,0


In [56]:
output

Unnamed: 0,0
0,0.0
1,0.212002
2,0.424004


In [57]:
def ssr(target, predicted):
    return np.sum((target - predicted) ** 2)

## Derivative of b3

$$\Large \frac{dSSR}{db_3} = \frac{dSSR}{d Predicted} \cdot \frac{d Predicted}{d b_3}$$


$$\Large \frac{dSSR}{db_3} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)} \cdot 1$$

In [217]:
def derivative_of_b3_test(target, output):
    summa = 0
    for i in range(len(target)):
        summa += (-2) * (target.iloc[i, 0] - output.iloc[i, 0])
    return summa

In [218]:
derivative_of_b3_test(target, output)

-0.7279879538356002

In [75]:
def derivative_of_b3(target, output):
    return np.sum((-2) * (target - output))[0]

In [76]:
derivative_of_b3(target, output)

-0.7279879538356002

## Derivative of w3 and w4

$$\Large \frac{dSSR}{dw_3} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dw_3}$$

$$\Large \frac{dSSR}{dw_4} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dw_4}$$

$$\Large \frac{dSSR}{dPredicted} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)}$$

$$\Large \frac{dPredicted}{dw_3} = \frac{d}{dw_3}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = y_{1,i}$$


$$\Large \frac{dPredicted}{dw_4} = \frac{d}{dw_4}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = y_{2,i}$$

In [231]:
##-- запасной вариант: на выходе один и тот же ответ


def derivative_weights_test(target, output, hidden1):
    derivative = []
    for i in range(hidden1.shape[1]):
        summa = 0
        for j in range(len(output)):
            summa += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * hidden1.iloc[j, i]
        derivative.append(summa)
    return pd.DataFrame(derivative)

gradient_weights = derivative_weights_test(target, output, hidden1)
gradient_weights

Unnamed: 0,0
0,-0.014993
1,0.060247


In [197]:
def derivative_weights(target, output, hidden1):
    return np.dot(((-2) * (target - output)).T, hidden1).T

In [198]:
gradient_weights = derivative_weights(target, output, hidden1)
gradient_weights

array([[-0.01499303],
       [ 0.06024695]])

## Derivative of w1 and w2 and b1,b2

$$\Large \frac{dSSR}{dw_1} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dy_1} \cdot \frac{dy_1}{dx_1} \cdot \frac{dx_1}{dw_1}$$

$$\Large \frac{dSSR}{dw_2} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dy_2} \cdot \frac{dy_2}{dx_2} \cdot \frac{dx_2}{dw_2}$$

$$\Large \frac{dSSR}{dPredicted} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)}$$

$$\Large \frac{dPredicted}{dy_1} = \frac{d}{dy_1}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = w_3$$


$$\Large \frac{dy_1}{dx_1} = \frac{d}{dx_1}ln(1 + e^x) = \frac{e^x}{e^x + 1}$$

$$\Large \frac{dx_1}{dw_1} = \frac{d}{dw_1}(Input_i \cdot w_1 + b_1) = Input_i$$

In [245]:
def derivative_hidden_test(target, output, weights_2, X, n_neurons=2):
    gradients_w = []
    gradients_b = []
    for i in range(n_neurons):
        summa_w = 0.0
        summa_b = 0.0
        for j in range(len(X)):
            summa_w += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * weights_2.iloc[i, 0] * (np.exp(X.iloc[j, 0]) / (1 + np.exp(X.iloc[j, 0]))) * X.iloc[j, 0]
            summa_b += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * weights_2.iloc[i, 0] * (np.exp(X.iloc[j, 0]) / (1 + np.exp(X.iloc[j, 0])))
        gradients_w.append(summa_w)
        gradients_b.append(summa_b)
    return pd.DataFrame(np.array(gradients_w)).T, np.array(gradients_b).T

In [165]:
def softmax_derivative(x):
    return np.exp(x) / (1 + np.exp(x))

In [176]:
def derivative_hidden(target, output, weights_2, X):
    ssr_pred = (-2.) * (target - output)
    
    expon = softmax_derivative(output)
    weights_deriv = np.dot(X.values.T, ssr_pred * expon) * weights_2
    
    biases_deriv = np.dot(np.dot(ssr_pred, weights_2.T).T, expon)
    return pd.DataFrame(weights_deriv).T, biases_deriv.T 

In [178]:
derivative_hidden(target, output, weights_2, X)[0]

Unnamed: 0,0,1
0,-0.063571,0.016684


In [248]:
def train(X, target, epochs=10, learning_rate = 0.1):
    
    weights_1, biases_1 = init_weights(X, 2)
    
    hidden1 = neuron_layer(X, weights_1, biases_1, activation=None)
    
    weights_2, biases_2 = init_weights(hidden1, 1, random_state=43)
    output = neuron_layer(hidden1, weights_2, biases_2, activation=None)
    for epoch in range(epochs):
        error = ssr(target, output)
        print(epoch, ": Error:", np.array(error))
        
        ## b3
        gradient_b3 = derivative_of_b3_test(target, output)
        step_size_b3 = gradient_b3 * learning_rate
        
        biases_2 = np.array(biases_2 - step_size_b3)
        
        ##-- w3, w4  
        gradient_weights = derivative_weights_test(target, output, hidden1)
        step_size_weights = gradient_weights * learning_rate
        weights_2 = weights_2 - step_size_weights
        
        ##-- w1, w2, b1, b2
        gradient_weights_1_2, gradient_biases_1_2 = derivative_hidden(target, output, weights_2, X)
        step_size_w_1_2 = gradient_weights_1_2 * learning_rate
        step_size_b_1_2 = gradient_biases_1_2 * learning_rate
        weights_1 = weights_1 - step_size_w_1_2
        biases_1 = biases_1 - step_size_b_1_2
        
        
        hidden1 = neuron_layer(X, weights_1, biases_1, activation=crossenthropy)
        output = neuron_layer(hidden1, weights_2, biases_2, activation=None)
    
    return output

In [249]:
train(X, target, epochs=100, learning_rate=0.1)

0 : Error: [0.80072024]
1 : Error: [1.4530628]
2 : Error: [0.67200867]
3 : Error: [0.67175416]
4 : Error: [0.67158617]
5 : Error: [0.67142384]
6 : Error: [0.67126646]
7 : Error: [0.67111385]
8 : Error: [0.67096583]
9 : Error: [0.67082223]
10 : Error: [0.67068289]
11 : Error: [0.67054765]
12 : Error: [0.67041636]
13 : Error: [0.67028887]
14 : Error: [0.67016506]
15 : Error: [0.67004479]
16 : Error: [0.66992794]
17 : Error: [0.66981439]
18 : Error: [0.66970402]
19 : Error: [0.66959672]
20 : Error: [0.66949239]
21 : Error: [0.66939094]
22 : Error: [0.66929225]
23 : Error: [0.66919625]
24 : Error: [0.66910284]
25 : Error: [0.66901193]
26 : Error: [0.66892345]
27 : Error: [0.66883732]
28 : Error: [0.66875346]
29 : Error: [0.6686718]
30 : Error: [0.66859227]
31 : Error: [0.6685148]
32 : Error: [0.66843933]
33 : Error: [0.6683658]
34 : Error: [0.66829414]
35 : Error: [0.66822431]
36 : Error: [0.66815624]
37 : Error: [0.66808988]
38 : Error: [0.66802518]
39 : Error: [0.66796209]
40 : Error: [0

Unnamed: 0,0
0,0.306355
1,0.333877
2,0.358289
