![image](images\YAH.png)

In [281]:
import random, math
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def swish(x):
    return x*sigmoid(x)

def relu(x):
    return np.maximum(0,x)

def softmax(x):
    p = np.exp(x - np.max(x))
    return p/np.sum(p)

def MSE(target, x):
    return np.power(target - x, 2)

def activation_function(z, act, derivative=False, activated_value=None):
    if derivative == False:
        if act=="sigmoid":
            return sigmoid(z)
        elif act == "swish":
            return swish(z)
        elif act == "relu":
            return relu(z)
        elif act=="tanh":
            return np.tanh(z)
        elif act=="softmax":
            return softmax(z)
    else:
        if act=="sigmoid":
            return sigmoid(z)
        elif act == "swish":
            return swish(z)
        elif act == "relu":
            return relu(z)
        elif act=="tanh":
            return 1 - np.power(activated_value, 2)
        elif act=="softmax":
            return softmax(z)
    
    

class RNN():
    
    def __init__(self, l_rate):
        self.layers = []
        self.learning_rate = l_rate
        
    def add_layer(self, input_size, output_size, activation=None, last_activation=None):
        new_layer = Layer(input_size, output_size, activation)
        self.layers.append(new_layer)
        
    def forward_pass(self, input_data):
        self.layers[0].forward_pass(input_data)
        for i in range(1, len(self.layers)):
            prev_layer_out = self.layers[i-1].Y[-1]
            self.layers[i].forward_pass(prev_layer_out)
            
        return self.layers[-1].Y[-1]
            
    def clear_memory(self):
        for i in range(0, len(self.layers)):
            self.layers[i].clear_memory()
            
    def backpropagation_through_time(self, input_data, target):
        gradient = self.layers[-1].der_MSE(target)
        for i in range(1, len(self.layers)):
            inputs = self.layers[-i-1].Y
            gradient = self.layers[-i].descent(gradient, inputs, self.learning_rate)
        self.layers[0].descent(gradient, input_data, self.learning_rate)
            
class Layer():
    
    def __init__(self, input_size, output_size, activation=None, last_activation=None):
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation
        self.last_activation = last_activation
        
        self.weights_X = self.weights_init(output_size, input_size)
        self.weights_H = self.weights_init(output_size, output_size)
        self.weights_Y = self.weights_init(output_size, output_size)
        self.bias      = self.weights_init(output_size, 1)
        
        self.A = np.empty((0,output_size,1))
        self.Y = np.empty((0,output_size,1))
        self.H = np.empty((0,output_size,1))
        
        
    def weights_init(self, rows, cols):
        return np.random.uniform(-1,1,(rows, cols))
    
    
    def forward_pass(self, input_data):
        new_H = self.weights_X.dot(input_data) + self.bias
        if len(self.A) > 0:
             new_H += self.weights_H.dot(self.A[-1]) 
        self.H = np.append(self.H, np.array([new_H]), axis=0)
        
        
        new_A = activation_function(self.H[-1], self.activation)
        self.A = np.append(self.A, np.array([new_A]), axis=0)
        
        
        new_Y = self.weights_Y.dot(self.A[-1])
        if self.last_activation != None:
            new_Y = activation_function(new_Y, self.last_activation)
        self.Y = np.append(self.Y, np.array([new_Y]), axis=0)
        

    def clear_memory(self):
        self.A = np.empty((0, self.output_size, 1))
        self.Y = np.empty((0, self.output_size, 1))
        self.H = np.empty((0, self.output_size, 1))
        
        
    def der_MSE(self, target):
        return 2*(self.Y[-1] - target)
     
    
    def descent(self, gradient, input_data, learning_rate):
        input_matrix = np.array(input_data)
        
        
        dC_Wy = 0
        dC_Wh = 0
        dC_Wx = 0
        dC_X = 0
        
        dY_Wy = np.tile(self.A[-1].T, (self.weights_Y.shape[0], 1))
        dC_Wy = np.multiply(np.tile(gradient, (1, self.weights_Y.shape[1])), dY_Wy)
        
        dY_A = np.sum(self.weights_Y, axis=0, keepdims=True).T
        dA_H = activation_function(self.H[-1], self.activation, derivative=True, activated_value=self.A[-1])
        dC_H = np.multiply(gradient, dA_H)
        dC_H_transformed = np.tile(dC_H, (1, self.weights_X.shape[1]))
        dC_X = np.sum(np.multiply(self.weights_X, dC_H_transformed), axis=0, keepdims=True).T
        
        self.weights_Y -= learning_rate * dC_Wy
        
        return dC_X
    

In [282]:
network = RNN(0.01)
network.add_layer(1, 3, "tanh")
network.add_layer(3, 2, "tanh")
network.add_layer(2, 1, "tanh")

In [283]:
input_data = [
    [[0.0]],
    [[0.1]],
    [[0.2]],
    [[0.3]],
    [[0.4]],
    [[0.5]],
    [[0.6]],
    [[0.7]]
]

In [284]:
epochs = 5000
network.clear_memory()
for e in range(0, epochs):
    for i in range(0, len(input_data)-1):
        output = network.forward_pass(input_data[i])
        network.backpropagation_through_time(input_data, input_data[i+1])
        if e%100 == 0:
            print("---------------")
            print("epoch " + str(e))
            print("sample " + str(i))
            print("output: \n" + str(output))
            print("target: \n" + str(input_data[i+1]))
        
        
        

---------------
epoch 0
sample 0
output: 
[[-0.01785206]]
target: 
[[0.1]]
---------------
epoch 0
sample 1
output: 
[[0.05140849]]
target: 
[[0.2]]
---------------
epoch 0
sample 2
output: 
[[0.00779089]]
target: 
[[0.3]]
---------------
epoch 0
sample 3
output: 
[[0.00634317]]
target: 
[[0.4]]
---------------
epoch 0
sample 4
output: 
[[-0.06363329]]
target: 
[[0.5]]
---------------
epoch 0
sample 5
output: 
[[-0.11159102]]
target: 
[[0.6]]
---------------
epoch 0
sample 6
output: 
[[-0.12858674]]
target: 
[[0.7]]
---------------
epoch 100
sample 0
output: 
[[0.34352591]]
target: 
[[0.1]]
---------------
epoch 100
sample 1
output: 
[[0.34422814]]
target: 
[[0.2]]
---------------
epoch 100
sample 2
output: 
[[0.34750335]]
target: 
[[0.3]]
---------------
epoch 100
sample 3
output: 
[[0.35181452]]
target: 
[[0.4]]
---------------
epoch 100
sample 4
output: 
[[0.35338519]]
target: 
[[0.5]]
---------------
epoch 100
sample 5
output: 
[[0.35589846]]
target: 
[[0.6]]
---------------
epoch 

---------------
epoch 1600
sample 0
output: 
[[0.21496946]]
target: 
[[0.1]]
---------------
epoch 1600
sample 1
output: 
[[0.28186971]]
target: 
[[0.2]]
---------------
epoch 1600
sample 2
output: 
[[0.37055472]]
target: 
[[0.3]]
---------------
epoch 1600
sample 3
output: 
[[0.50160311]]
target: 
[[0.4]]
---------------
epoch 1600
sample 4
output: 
[[0.46923991]]
target: 
[[0.5]]
---------------
epoch 1600
sample 5
output: 
[[0.48535579]]
target: 
[[0.6]]
---------------
epoch 1600
sample 6
output: 
[[0.50351284]]
target: 
[[0.7]]
---------------
epoch 1700
sample 0
output: 
[[0.18198331]]
target: 
[[0.1]]
---------------
epoch 1700
sample 1
output: 
[[0.25781036]]
target: 
[[0.2]]
---------------
epoch 1700
sample 2
output: 
[[0.36928749]]
target: 
[[0.3]]
---------------
epoch 1700
sample 3
output: 
[[0.52543134]]
target: 
[[0.4]]
---------------
epoch 1700
sample 4
output: 
[[0.48025229]]
target: 
[[0.5]]
---------------
epoch 1700
sample 5
output: 
[[0.49111116]]
target: 
[[0.6]]

---------------
epoch 3200
sample 0
output: 
[[0.12291406]]
target: 
[[0.1]]
---------------
epoch 3200
sample 1
output: 
[[0.15584862]]
target: 
[[0.2]]
---------------
epoch 3200
sample 2
output: 
[[0.34078348]]
target: 
[[0.3]]
---------------
epoch 3200
sample 3
output: 
[[0.57932145]]
target: 
[[0.4]]
---------------
epoch 3200
sample 4
output: 
[[0.5312931]]
target: 
[[0.5]]
---------------
epoch 3200
sample 5
output: 
[[0.52506737]]
target: 
[[0.6]]
---------------
epoch 3200
sample 6
output: 
[[0.55142454]]
target: 
[[0.7]]
---------------
epoch 3300
sample 0
output: 
[[0.12493742]]
target: 
[[0.1]]
---------------
epoch 3300
sample 1
output: 
[[0.15465322]]
target: 
[[0.2]]
---------------
epoch 3300
sample 2
output: 
[[0.33982858]]
target: 
[[0.3]]
---------------
epoch 3300
sample 3
output: 
[[0.57797052]]
target: 
[[0.4]]
---------------
epoch 3300
sample 4
output: 
[[0.53122152]]
target: 
[[0.5]]
---------------
epoch 3300
sample 5
output: 
[[0.52605756]]
target: 
[[0.6]]


---------------
epoch 4800
sample 0
output: 
[[0.13562755]]
target: 
[[0.1]]
---------------
epoch 4800
sample 1
output: 
[[0.14561535]]
target: 
[[0.2]]
---------------
epoch 4800
sample 2
output: 
[[0.33573827]]
target: 
[[0.3]]
---------------
epoch 4800
sample 3
output: 
[[0.56435571]]
target: 
[[0.4]]
---------------
epoch 4800
sample 4
output: 
[[0.52760806]]
target: 
[[0.5]]
---------------
epoch 4800
sample 5
output: 
[[0.53676412]]
target: 
[[0.6]]
---------------
epoch 4800
sample 6
output: 
[[0.55942994]]
target: 
[[0.7]]
---------------
epoch 4900
sample 0
output: 
[[0.13545376]]
target: 
[[0.1]]
---------------
epoch 4900
sample 1
output: 
[[0.14533681]]
target: 
[[0.2]]
---------------
epoch 4900
sample 2
output: 
[[0.33588901]]
target: 
[[0.3]]
---------------
epoch 4900
sample 3
output: 
[[0.56375777]]
target: 
[[0.4]]
---------------
epoch 4900
sample 4
output: 
[[0.52737338]]
target: 
[[0.5]]
---------------
epoch 4900
sample 5
output: 
[[0.53729924]]
target: 
[[0.6]]