In [2]:
# import numpy as np
import cupy as np
# cupy is similar to numpy with gpu acceleration, if you have gpu acceleration, CUDNN, and supported engine, import cupy as np and remove import numpy
import matplotlib.pyplot as plt

In [3]:
# Activation functions

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA,Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0
    return dZ 

def same(Z):
    return Z

def same_backward(dA,Z):
    return  dA # 1 x dA

def tanh(Z):
    return (np.exp(Z) - np.exp(-Z))/ (np.exp(Z) + np.exp(-Z))

def tanh_backward(dA,Z):
    
    return dA*(1 - np.power(tanh(Z),2))



In [4]:
# Loss and error Functions.

def return_loss(yhat,y):
    # MAE
    loss=np.sum(np.abs(yhat - y))/yhat.shape[1]

    return loss

def error(yhat,y):

    nm= np.subtract(yhat,y) 

    return nm

In [5]:
#Takes a list for  Initialize entry architecture using a dictionary

def dict_layers(encoded_list):
    """
    Takes a list of dict, each list contains input_size,output_size,activation for a single layer.
    Returns list of dict,
    This will help in tracking multiple architectures while easing the intializing of the network

    """
    
    network_architechture_list = []

    for item in encoded_list:
        network_architechture_list.append({"input_size" : item[0] ,"output_size" : item[1], "activation_func" : item[2]})


    return network_architechture_list

# Takes the neural netork architecture and intializes weights and biases

def initialize_nn_layers(nn_architecture,weight_scaling=0.15):
    

    params_values = {}
    np.random.seed(2)

    for idx, layer in enumerate(nn_architecture):
        idx = idx + 1
        input_size = layer["input_size"]
        output_size = layer["output_size"]
        
        params_values['Weights' + str(idx)] = np.random.randn(output_size, input_size) * weight_scaling
        params_values['bias' + str(idx)] = np.random.randn(output_size, 1) 
        
    return params_values

In [6]:
# Performs forward propagation on one layer only

def one_layer_forward(input, weight, bias, activation_func):

    Z = np.dot(weight, input) + bias
    
    if activation_func == "same":
        func = same
    elif activation_func == "sigmoid":
        func = sigmoid
    elif activation_func == "relu":
        func = relu
    elif activation_func == "tanh":
        func = tanh
    else:
        raise Exception(' Function not valid')
        
    return func(Z), Z


# Performs full forward propagation layer by layer.

def full_forward(X, params_values, nn_architecture):
    
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation_func"]
        W_curr = params_values["Weights" + str(layer_idx)]
        b_curr = params_values["bias" + str(layer_idx)]
        A_curr, Z_curr = one_layer_forward(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory

In [7]:
# Performs backward propagation on one layer only

def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation):
    m = A_prev.shape[1]
    
    if activation == "relu":
        backward_activation_func = relu_backward
    elif activation == "sigmoid":
        backward_activation_func = sigmoid_backward
    elif activation == "same":
        backward_activation_func = same_backward
    elif activation == "tanh":
        backward_activation_func = tanh_backward
    else:
        raise Exception('{} activation is not supported'.format(activation))
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = error(Y_hat,Y)
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation_func"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["Weights" + str(layer_idx_curr)]
        b_curr = params_values["bias" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [8]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["Weights" + str(layer_idx+1)] -= learning_rate * grads_values["dW" + str(layer_idx+1)]        
        params_values["bias" + str(layer_idx+1)] -= learning_rate * grads_values["db" + str(layer_idx+1)]

    return params_values

In [9]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = initialize_nn_layers(nn_architecture, 0.1)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward(X, params_values, nn_architecture)
        loss = return_loss(Y_hat, Y)
        cost_history.append(float(loss))

        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history

In [18]:
# a-	The direct problem of kinematic data initialisation
a1 = 2
a2 = 3

teta1_vect= np.deg2rad(np.linspace(0,180,num=10000))
np.random.shuffle(teta1_vect)
teta2_vect= np.deg2rad(np.linspace(0,180,num=10000))
np.random.shuffle(teta2_vect)

x= (np.multiply(a1,np.cos(teta1_vect)) + np.multiply(a2,np.cos(np.add(teta1_vect,teta2_vect))))
y= (np.multiply(a1,np.sin(teta1_vect)) + np.multiply(a2,np.sin(np.add(teta1_vect,teta2_vect))))

def rescale(array):
    # return array
    return np.divide(np.subtract(array,array.min()),np.subtract(array.max(),array.min()))


inputs = np.transpose(np.stack([rescale(teta1_vect),rescale(teta2_vect)], axis=1))
outputs = np.transpose(np.stack([rescale(x),rescale(y)], axis=1))

In [19]:
list_of_networks = []
list_of_networks.append([[2,150,'tanh'],[150,75,'tanh'],[75,50,'tanh'],[50,25,'tanh'],[25,10,'tanh'],[10,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,8,'same'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,8,'same'],[8,16,'same'],[16,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'relu'],[16,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,25,'same'],[25,50,'tanh'],[50,10,'tanh'],[10,2,'same']])
list_of_networks.append([[2,25,'tanh'],[25,50,'tanh'],[50,75,'tanh'],[75,50,'tanh'],[50,25,'tanh'],[25,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,2,'same']])
list_of_networks.append([[2,128,'tanh'],[128,64,'tanh'],[64,32,'tanh'],[32,16,'tanh'],[16,2,'same']])


for item in list_of_networks:
   
    prototype_network= dict_layers(item)
    _, cost_history =train(inputs,outputs,prototype_network,10000, 0.1)
    print("MAE is {} for {}".format(np.array(cost_history).min(), item))

MAE is 0.03052617258294965 for [[2, 150, 'tanh'], [150, 75, 'tanh'], [75, 50, 'tanh'], [50, 25, 'tanh'], [25, 10, 'tanh'], [10, 2, 'same']]
MAE is 0.09092225493261608 for [[2, 8, 'tanh'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.05448388974439323 for [[2, 8, 'same'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.2563429885212123 for [[2, 8, 'same'], [8, 16, 'same'], [16, 2, 'same']]
MAE is 0.10305107044387089 for [[2, 8, 'tanh'], [8, 16, 'relu'], [16, 2, 'same']]
MAE is 0.09092225493261608 for [[2, 8, 'tanh'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.0575664592546865 for [[2, 25, 'same'], [25, 50, 'tanh'], [50, 10, 'tanh'], [10, 2, 'same']]
MAE is 0.0400052980753999 for [[2, 25, 'tanh'], [25, 50, 'tanh'], [50, 75, 'tanh'], [75, 50, 'tanh'], [50, 25, 'tanh'], [25, 2, 'same']]
MAE is 0.13387466462087216 for [[2, 8, 'tanh'], [8, 2, 'same']]
MAE is 0.036067920007514305 for [[2, 128, 'tanh'], [128, 64, 'tanh'], [64, 32, 'tanh'], [32, 16, 'tanh'], [16, 2, 'same']]


In [24]:
# b-	The inverse problem of kinematic data initialisation
a1 = 2
a2 = 3

teta1_vect= np.deg2rad(np.linspace(0,180,num=40000))
np.random.shuffle(teta1_vect)
teta2_vect= np.deg2rad(np.linspace(0,180,num=40000))
np.random.shuffle(teta2_vect)

x= (np.multiply(a1,np.cos(teta1_vect)) + np.multiply(a2,np.cos(np.add(teta1_vect,teta2_vect))))
y= (np.multiply(a1,np.sin(teta1_vect)) + np.multiply(a2,np.sin(np.add(teta1_vect,teta2_vect))))

def rescale(array):
    # return array
    return np.divide(np.subtract(array,array.min()),np.subtract(array.max(),array.min()))


outputs = np.transpose(np.stack([rescale(teta1_vect),rescale(teta2_vect)], axis=1))
inputs = np.transpose(np.stack([rescale(x),rescale(y)], axis=1))

In [25]:
list_of_networks = []
list_of_networks.append([[2,150,'tanh'],[150,75,'tanh'],[75,50,'tanh'],[50,25,'tanh'],[25,10,'tanh'],[10,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,8,'same'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,8,'same'],[8,16,'same'],[16,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'relu'],[16,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,16,'tanh'],[16,2,'same']])
list_of_networks.append([[2,25,'same'],[25,50,'tanh'],[50,10,'tanh'],[10,2,'same']])
list_of_networks.append([[2,25,'tanh'],[25,50,'tanh'],[50,75,'tanh'],[75,50,'tanh'],[50,25,'tanh'],[25,2,'same']])
list_of_networks.append([[2,8,'tanh'],[8,2,'same']])
list_of_networks.append([[2,128,'tanh'],[128,64,'tanh'],[64,32,'tanh'],[32,16,'tanh'],[16,2,'same']])


for item in list_of_networks:
   
    prototype_network= dict_layers(item)
    _, cost_history =train(inputs,outputs,prototype_network,10000, 0.1)
    print("MAE is {} for {}".format(np.array(cost_history).min(), item))

MAE is 0.07868767486199724 for [[2, 150, 'tanh'], [150, 75, 'tanh'], [75, 50, 'tanh'], [50, 25, 'tanh'], [25, 10, 'tanh'], [10, 2, 'same']]
MAE is 0.13620612901051468 for [[2, 8, 'tanh'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.0888599667592713 for [[2, 8, 'same'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.3337954569898536 for [[2, 8, 'same'], [8, 16, 'same'], [16, 2, 'same']]
MAE is 0.1746188211483029 for [[2, 8, 'tanh'], [8, 16, 'relu'], [16, 2, 'same']]
MAE is 0.13620612901051468 for [[2, 8, 'tanh'], [8, 16, 'tanh'], [16, 2, 'same']]
MAE is 0.09319061872545518 for [[2, 25, 'same'], [25, 50, 'tanh'], [50, 10, 'tanh'], [10, 2, 'same']]
MAE is 0.09066523030735546 for [[2, 25, 'tanh'], [25, 50, 'tanh'], [50, 75, 'tanh'], [75, 50, 'tanh'], [50, 25, 'tanh'], [25, 2, 'same']]
MAE is 0.2016698071792191 for [[2, 8, 'tanh'], [8, 2, 'same']]
MAE is 0.07415273912200762 for [[2, 128, 'tanh'], [128, 64, 'tanh'], [64, 32, 'tanh'], [32, 16, 'tanh'], [16, 2, 'same']]
