In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import truncnorm

In [2]:
X = pd.DataFrame(np.linspace(0, 1, num=3).reshape(-1, 1), columns=['Dosage'])
X

Unnamed: 0,Dosage
0,0.0
1,0.5
2,1.0


In [3]:
target = np.array([0, 1, 0]).reshape(-1, 1)
target

array([[0],
       [1],
       [0]])

# 1. Initialize Network

In [186]:
def init_weights(X, n_neurons, random_state=42):
    n_inputs = int(X.shape[1])
    stddev = 2 / np.sqrt(n_inputs + n_neurons)
    b = np.zeros(n_neurons)
    weights = pd.DataFrame(truncnorm.rvs(-1, 1, size=(n_inputs, n_neurons), scale=stddev, random_state=random_state))
    
    return weights, b

In [187]:
weights, biases = init_weights(X, 2)
display(weights)
biases

Unnamed: 0,0,1
0,-0.249842,1.003948


array([0., 0.])

# 2. Forward Propagate

In [188]:
def softmax(x):
    return np.log(1 + np.exp(x))

In [189]:
def crossenthropy(x):
    return 1. / (1. + np.exp(-x))

In [190]:
X

Unnamed: 0,Dosage
0,0.0
1,0.5
2,1.0


In [191]:
weights

Unnamed: 0,0,1
0,-0.249842,1.003948


In [192]:
np.dot(X, weights)

array([[ 0.        ,  0.        ],
       [-0.12492101,  0.5019739 ],
       [-0.24984201,  1.00394781]])

In [193]:
np.dot(X, weights) + biases

array([[ 0.        ,  0.        ],
       [-0.12492101,  0.5019739 ],
       [-0.24984201,  1.00394781]])

In [194]:
def neuron_layer(X, weights, b, activation=None):
    Z = np.dot(X, weights) + b
    if activation is not None:
        Z = activation(Z)
        pass
    Z = pd.DataFrame(Z)
    return Z

In [195]:
hidden1 = neuron_layer(X, weights, biases, activation=None)
hidden1

Unnamed: 0,0,1
0,0.0,0.0
1,-0.124921,0.501974
2,-0.249842,1.003948


In [196]:
weights_2, biases_2 = init_weights(hidden1, 1, random_state=43)
display(weights_2)
biases_2

Unnamed: 0,0
0,-0.825995
1,0.21678


array([0.])

In [197]:
output = neuron_layer(hidden1, weights_2, biases_2, activation=None)
output

Unnamed: 0,0
0,0.0
1,0.212002
2,0.424004


# 3. Back Propagate Error

## Sum of squared residuals

$$\Large SSR = \sum_{i-1}^n{(Observed_i - Predicted_i)^2}$$

In [235]:
target = pd.DataFrame(target)
target

Unnamed: 0,0
0,0
1,1
2,0


In [236]:
output

Unnamed: 0,0
0,0.0
1,0.212002
2,0.424004


In [237]:
def ssr(target, predicted):
    return np.sum((target - predicted) ** 2)

## Derivative of b3

$$\Large \frac{dSSR}{db_3} = \frac{dSSR}{d Predicted} \cdot \frac{d Predicted}{d b_3}$$


$$\Large \frac{dSSR}{db_3} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)} \cdot 1$$

In [238]:
def derivative_of_b3(target, output):
    return np.sum((-2) * (target - output))

In [239]:
derivative_of_b3(target, output)

0   -0.727988
dtype: float64

## Derivative of w3 and w4

$$\Large \frac{dSSR}{dw_3} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dw_3}$$

$$\Large \frac{dSSR}{dw_4} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dw_4}$$

$$\Large \frac{dSSR}{dPredicted} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)}$$

$$\Large \frac{dPredicted}{dw_3} = \frac{d}{dw_3}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = y_{1,i}$$


$$\Large \frac{dPredicted}{dw_4} = \frac{d}{dw_4}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = y_{2,i}$$

In [240]:
##-- запасной вариант: на выходе один и тот же ответ


# def derivative_weights_test(target, output, hidden1):
#     derivative = []
#     for i in range(hidden1.shape[1]):
#         summa = 0
#         for j in range(len(output)):
#             summa += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * hidden1.iloc[j, i]
#         derivative.append(summa)
#     return pd.DataFrame(derivative)

# gradient_weights = derivative_weights_test(target, output, hidden1)
# gradient_weights

In [241]:
def derivative_weights(target, output, hidden1):
    derivative = []
    for i in range(hidden1.shape[1]):
        derivative.append(np.dot((-2.) * (target - output).T, hidden1[i]))
    return np.array(derivative)

In [242]:
gradient_weights = derivative_weights(target, output, hidden1)
gradient_weights

array([[-0.01499303],
       [ 0.06024695]])

## Derivative of w1 and w2 and b1,b2

$$\Large \frac{dSSR}{dw_1} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dy_1} \cdot \frac{dy_1}{dx_1} \cdot \frac{dx_1}{dw_1}$$

$$\Large \frac{dSSR}{dw_2} = \frac{dSSR}{dPredicted} \cdot \frac{dPredicted}{dy_2} \cdot \frac{dy_2}{dx_2} \cdot \frac{dx_2}{dw_2}$$

$$\Large \frac{dSSR}{dPredicted} = \sum_{i=1}^n{-2 \cdot (Observed_i - Predicted_i)}$$

$$\Large \frac{dPredicted}{dy_1} = \frac{d}{dy_1}(y_{1,i}w_3 + y_{2,i}w_4 + b_3) = w_3$$


$$\Large \frac{dy_1}{dx_1} = \frac{d}{dx_1}ln(1 + e^x) = \frac{e^x}{e^x + 1}$$

$$\Large \frac{dx_1}{dw_1} = \frac{d}{dw_1}(Input_i \cdot w_1 + b_1) = Input_i$$

In [243]:
def derivative_hidden(target, output, weights_2, X, n_neurons):
    gradients_w = []
    gradients_b = []
    for i in range(n_neurons):
        summa_w = 0.0
        summa_b = 0.0
        for j in range(len(X)):
            summa_w += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * weights_2.iloc[i, 0] * (np.exp(X.iloc[j, 0]) / (1 + np.exp(X.iloc[j, 0]))) * X.iloc[j, 0]
            summa_b += (-2.) * (target.iloc[j, 0] - output.iloc[j, 0]) * weights_2.iloc[i, 0] * (np.exp(X.iloc[j, 0]) / (1 + np.exp(X.iloc[j, 0])))
        gradients_w.append(summa_w)
        gradients_b.append(summa_b)
    return pd.DataFrame(np.array(gradients_w)), np.array(gradients_b)

In [244]:
def train(X, target, epochs=10, learning_rate = 0.1):
    
    weights_1, biases_1 = init_weights(X, 2)
    
    hidden1 = neuron_layer(X, weights_1, biases_1, activation=None)
    
    weights_2, biases_2 = init_weights(hidden1, 1, random_state=43)
    output = neuron_layer(hidden1, weights_2, biases_2, activation=None)
    for epoch in range(epochs):
        error = ssr(target, output)
        print(epoch, ": Error:", np.array(error))
        
        ## b3
        gradient_b3 = derivative_of_b3(target, output)
        step_size_b3 = gradient * learning_rate
        
        biases_2 = np.array(biases_2 - step_size_b3)
        
        ##-- w3, w4  
        gradient_weights = derivative_weights(target, output, hidden1)
        step_size_weights = gradient_weights * learning_rate
        weights_2 = weights_2 - step_size_weights
        
        ##-- w1, w2, b1, b2
        gradient_weights_1_2, gradient_biases_1_2 = derivative_hidden(target, output, weights_2, X, 2)
        step_size_w_1_2 = gradient_weights_1_2 * learning_rate
        step_size_b_1_2 = gradient_biases_1_2 * learning_rate
        weights_1 = weights_1 - step_size_w_1_2.T
        biases_1 = biases_1 - step_size_b_1_2.T
        
        
        hidden1 = neuron_layer(X, weights_1, biases_1, activation=crossenthropy)
        output = neuron_layer(hidden1, weights_2, biases_2, activation=crossenthropy)
    
    return output

In [245]:
train(X, target, epochs=30, learning_rate=0.1)

0 : Error: [0.80072024]
1 : Error: [0.71130634]
2 : Error: [0.71457612]
3 : Error: [0.71729029]
4 : Error: [0.71939698]
5 : Error: [0.720862]
6 : Error: [0.72167312]
7 : Error: [0.72184371]
8 : Error: [0.7214147]
9 : Error: [0.72045393]
10 : Error: [0.71905247]
11 : Error: [0.71731805]
12 : Error: [0.71536636]
13 : Error: [0.7133117]
14 : Error: [0.71125842]
15 : Error: [0.70929437]
16 : Error: [0.70748716]
17 : Error: [0.70588325]
18 : Error: [0.70450933]
19 : Error: [0.70337528]
20 : Error: [0.70247784]
21 : Error: [0.70180435]
22 : Error: [0.70133609]
23 : Error: [0.70105104]
24 : Error: [0.70092588]
25 : Error: [0.70093752]
26 : Error: [0.70106395]
27 : Error: [0.70128488]
28 : Error: [0.70158192]
29 : Error: [0.7019387]


Unnamed: 0,0
0,0.465814
1,0.405538
2,0.363281
