In [1]:
import numpy as np
from numpy.random import seed
import matplotlib.pyplot as plt
import warnings
import pandas as pd
import seaborn as sns
warnings.filterwarnings("ignore")
from matplotlib.ticker import MaxNLocator
import matplotlib.ticker as mtick
import matplotlib
from matplotlib import rc
rc('axes', linewidth=1.2)
matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

In [2]:
"""
Setting up the parameters of the RFMEONP
"""
sites = [3,2]
lamda1 = [1+i/4 for i in range(1,5)]
lamda2= [1.0+i/3 for i in range(1,4)]
lamda = [lamda1, lamda2]
p = sum(sites)+1
poolsize = 1

In [3]:
"""
Generating the Training and Test Sets
"""

"""
Training Set Generation
"""
h = 0.1
t = np.arange(0, 1+h, h)
t = t.reshape((1,len(t)))
ntrain = t.shape[1]
scale = 10000


"""
Testing Set Generation
"""
h = 0.0001
t0 = np.arange(0, .9+h,h)
t0 = t0.reshape((1,len(t0)))
n0 = t0.shape[1]

In [7]:
def G(Z):
    return Z

def Gprime(Z):
    return 1.0

def funct(y, lamda, sites, H):
    f = y.copy()
    n = sum(sites)
    sites0 = 0
    sitesn = 0
    ii = 0
    z = 2*H*y[n]
    for i in range(len(sites)):
        k = sum(sites[:i])
        p = sites[i]
        ii = 0
        f[k] = lamda[i][ii]*G(z)*(1-y[k])-lamda[i][ii+1]*y[k]*(1-y[k+1])
        sites0 += lamda[i][ii]*G(z)*(1-y[k])
        ii += 1
        for j in range(k+1, k+p-1):
            f[j] = lamda[i][ii]*y[j-1]*(1-y[j])-lamda[i][ii+1]*y[j]*(1-y[j+1])
            ii += 1
        f[k+p-1] = lamda[i][ii]*y[k+p-2]*(1-y[k+p-1])-lamda[i][ii+1]*y[k+p-1]
        sitesn += lamda[i][ii+1]*y[k+p-1]
    f[n] = 1/(2*H)*(sitesn-sites0)
    return scale*f

def init_weights_biases(initialiser, N0, N1):
    if initialiser.upper() == 'NORMAL':
        return [np.random.normal(0,np.sqrt(2/N1),(N1,N0)), np.random.normal(0, np.sqrt(2/N1), (N1,1))]
    if initialiser.upper() == 'UNIFORM':
        return [np.random.uniform(0,0.05,(N1,N0)),np.random.uniform(0,0.05,(N1,1))]
    if initialiser.upper() == 'XAVIER':
        return [np.random.uniform(0, 1/np.sqrt(N1), (N1,N0)), np.random.normal(0, 1/np.sqrt(N1), (N1,1))]
    else:
        return -1
    
def add_layer(input_shape, hidden_units , activation = 'sigmoid', initialiser = 'normal'): 
    weights_biases = init_weights_biases(initialiser, input_shape, hidden_units)
    NA.append(activation)
    NW.append(weights_biases[0])
    NB.append(weights_biases[1])
    return None

def estimated_derivative(A, Z, NA, NW, NB, t):
    dydt = A[2]+t*(NW[1]@((activation_derivative(Z[0], NA[0])*(NW[0]@np.ones_like(t)))))
    return dydt

def activation_function(x, string, alpha = 0.01):
    if string.upper() == 'SIGMOID':
        return (1/(1+np.exp(-x)))
    if string.upper() == 'BPS':
        return  1-2*(1/(1+np.exp(-x)))
    if string.upper() == 'TRIG' or string.upper() == 'TRIGNOMETRIC':
        return np.cos(x)
    if string.upper() == 'CUSTOM':
        return 0.9*np.tanh(x)-0.5*(1/(1+np.exp(-x)))
    if string.upper() == 'TANH':
        return np.tanh(x)
    if string.upper() == 'RELU':
        return (x+np.abs(x))
    if string.upper() == 'LEAKYRELU' or string.upper() == 'LR':
        return (x+alpha*x+np.abs(x-alpha*x))/2
    if string.upper() == 'LINEAR':
        return x
    if string.upper() == 'EXPONENTIAL' or string.upper() == 'EXP':
        return np.exp(x)
    if string.upper() == 'ELU':
        x[x<0] = 0.01*(np.exp(x[x<0])-1)
        return x
    if string.upper() == 'EXP':
        return np.exp(-x)
    return None
    
def activation_derivative(x, string, alpha = 0.01):
    if string.upper() == 'SIGMOID':
        return (1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'BPS':
        return -2*(1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'TRIG' or string.upper() == 'TRIGNOMETRIC':
        return -np.sin(x)
    if string.upper() == 'TANH':
        return (1-np.tanh(x)**2)
    if string.upper() == "RELU":
        x[x<0] = 0
        x[x>=0] = 1
        return x
    if string.upper() == 'CUSTOM':
        return 0.9*(1-np.tanh(2*x)**2)-0.5*(1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'LEAKYRELU' or string.upper() == 'LR':
        dx = np.ones(x.shape)
        dx[x < 0] = alpha
        x = dx.copy()
        return x
    if string.upper() == 'LINEAR':
        x = 1
        return x
    if string.upper() == 'EXPONENTIAL' or string.upper() == 'EXP':
        return np.exp(x)
    if string.upper() == 'ELU':
        x[x>=0] = 1
        x[x<0] = 0.01*(np.exp(x[x<0]))
        return x
    if string.upper() == 'EXP':
        return -np.exp(-x)
    return None
    
def forward_propagation(X,NA,NW,NB):
    A = [X]
    Z = []
    for i in range(len(NA)):
        Zstar = (NW[i]@A[i]+NB[i])
        Astar = activation_function(Zstar.astype(float), NA[i])
        Z.append(Zstar.astype(float))
        A.append(Astar)
    return([Z,A])

def backward_propagation(NA, NW, Z, A, dZ, dW, dB, y, f, H, sites, lamda, alpha):
    Adot = (np.gradient(A[L])[0]/scale)-10**-10
    Adotdot = (np.gradient(Adot)[0]/scale)
    dAL = [0 for i in range(sum(sites)+1)]
    n = sum(sites)
    Y = 2*H*y[n]
    total = y[n]
    penalty = alpha
    penalty2 = 1
    for j in range(n):
        total += y[j]/(2*H)
    for i in range(len(sites)):
        k = sum(sites[:i])
        p = sites[i]
        ii = 0
        df0a0 = -lamda[i][ii]*G(Y)*t-lamda[i][ii+1]*t*(1-y[k+1])
        df1a0 = lamda[i][ii+1]*t*(1-y[k+1])
        dfza0 = +1/(2*H)*lamda[i][0]*G(Y)*t
        dAL[k] = (A[L][k]+t*Adot[k]-f[k])*(1+t*Adotdot[k]/Adot[k]-df0a0)-(A[L][k+1]+t*Adot[k+1]-f[k+1])*df1a0-(A[L][n]+t*Adot[n]-f[n])*dfza0-penalty*(0.5-total)*t
        ii += 1
        
        for j in range(k+1, k+p-1):
            df0a1 = lamda[i][ii]*y[j-1]*t
            df1a1 = -lamda[i][ii]*y[j-1]*t-lamda[i][ii+1]*t*(1-y[j+1])
            df2a1 = lamda[i][ii+1]*t*(1-y[j+1])
            dAL[j] = -(A[L][j-1]+t*Adot[j-1]-f[j-1])*df0a1+(A[L][j]+t*Adot[j]-f[j])*(1+t*Adotdot[j]/Adot[j]-df1a1)-(A[L][j+1]+t*Adot[j+1]-f[j+1])*df2a1-penalty*(0.5-total)*t
            ii += 1

        df4a5 = lamda[i][ii]*y[ii-1]*t
        df5a5 = -lamda[i][ii]*y[ii-1]*t-lamda[i][ii+1]*t
        dfza5 = 1/(2*H)*lamda[i][ii+1]*t
        dAL[k+p-1] = penalty2*(-(A[L][k+p-2]+t*Adot[k+p-2]-f[k+p-2])*df4a5+(A[L][k+p-1]+t*Adot[k+p-1]-f[k+p-1])*(1+t*Adotdot[k+p-1]/Adot[k+p-1]-df5a5)-(A[L][n]+t*Adot[n]-f[n])*dfza5)-penalty*(0.5-total)*t
    s = 0
    for i in range(len(sites)):
        k = sum(sites[:i])
        df0az = lamda[i][0]*(1-y[k])
        s += -df0az
        dAL[n] += -penalty2*(A[L][k]+t*Adot[k]-f[k])*Gprime(Y)*t*df0az
    dAL[n] += penalty2*((A[L][n]+t*Adot[n]-f[n])*(1+t*Adotdot[n]/Adot[n]-Gprime(Y)*2*H*t*s))-2*H*penalty*(0.5-total)*t
    
    dtAL = np.array(dAL).reshape((sum(sites)+1,ntrain))/ntrain
    for i in range(L-1,-1,-1):    
        dZ[i] = dtAL*activation_derivative(Z[i],NA[i])
        dW[i] = (dZ[i]@A[i].T)/ntrain
        dB[i] = np.sum(dZ[i], axis = 1, keepdims = True)/ntrain
        dtAL = (NW[i].T@dZ[i])/ntrain
    return [dZ, dW, dB]    

def rmsprop(NW, NB, dW, dB, SW, SB, epsilon, lr, beta):
    for i in range(L):
        SW[i] = (beta*SW[i]+(1-beta)*dW[i]**2)
        SB[i] = (beta*SB[i]+(1-beta)*dB[i]**2)
        NW[i] = NW[i]-lr*dW[i]/(SW[i]**0.5+epsilon)
        NB[i] = NB[i]-lr*dB[i]/(SB[i]**0.5+epsilon)
    return [NW, NB, SW, SB]

def adam(i, NW, NB, dW, dB, VW, VB, SW, SB, epsilon, lr, momentum, beta):
    VWhat = VW.copy()
    VBhat = VB.copy()
    SWhat = SW.copy()
    SBhat = SB.copy()
    for j in range(L):
        VW[j] = momentum*VW[j]+(1-momentum)*dW[j]
        VB[j] = momentum*VB[j]+(1-momentum)*dB[j]
        SW[j] = beta*SW[j]+(1-beta)*(dW[j]**2)
        SB[j] = beta*SB[j]+(1-beta)*(dB[j]**2)
        VWhat[j] = VW[j]/(1-momentum**i)
        VBhat[j] = VB[j]/(1-momentum**i)
        SWhat[j] = SW[j]/(1-beta**i)
        SBhat[j] = SB[j]/(1-beta**i)
        NW[j] = NW[j]-lr*(VWhat[j]/np.sqrt(SWhat[j]+epsilon))
        NB[j] = NB[j]-lr*(VBhat[j]/np.sqrt(SBhat[j]+epsilon))
    return [NW, NB, VW, VB, SW, SB]

def train_model(X, mean, epochs, NA, NW, NB, init, optimiser = 'sgd', learning_rate = 0.001, 
                momentum = 0.9, epsilon = 10**-8, beta = 0.999, poolsize = 1, alpha = 1):
    [dZ, dW, dB] = [[0 for i in range(L)],[0 for i in range(L)],[0 for i in range(L)]]
    VW = [np.zeros(NW[i].shape) for i in range(L)]
    VB = [np.zeros(NB[i].shape) for i in range(L)]
    SW = [np.zeros(NW[i].shape) for i in range(L)]
    SB = [np.zeros(NB[i].shape) for i in range(L)]
    for i in range(epochs):
        [Z, A] = forward_propagation(X, NA, NW, NB)
        yhat = init+t*(A[L])
        ydot = estimated_derivative(A, Z, NA, NW, NB, t)
        f = funct(yhat, lamda, sites, poolsize)
        [dZ, dW, dB] = backward_propagation(NA, NW, Z, A, dZ, dW, dB, yhat, f, poolsize, sites, lamda, alpha)
        [NW, NB, VW, VB, SW, SB] =  adam(i+1, NW, NB, dW, dB, VW, VB, SW, SB, epsilon, learning_rate, momentum, beta)
#         learning_rate = learning_rate*0.999**500
#         [NW, NB, SW, SB] = rmsprop(NW, NB, dW, dB, SW, SB, epsilon, learning_rate, beta)
        if (i/epochs)*100 in range(100):
            print('█', end = '')
#     print('\n')
    return [NW, NB]

In [9]:
p = sum(sites)+1
alpha = 1000
errors = []
rn = [[] for j in range(p)]
zs = []
L = 3
k = 10
for ii in range(k):
    print(ii+1, end = '.')
    b = min(1,poolsize*(1/sum(sites)))
    init = np.random.uniform(0, b, (p,1))
    #         init = np.zeros((p,1))
    init[-1] = 0.5-1/(2*poolsize)*(sum(init[:-1]))
    [NA, NW, NB] = [[],[],[]]
    add_layer(input_shape = 1, hidden_units = p, activation = 'tanh', initialiser = 'xavier')
    for i in range(L-2):
        add_layer(input_shape = p, hidden_units = p, activation = 'sigmoid', initialiser = 'xavier')
    add_layer(input_shape = p, hidden_units = p, activation = 'bps', initialiser = 'xavier')
    """
    Training the CDNN on the train set
    """   
    [NW, NB] = train_model(t, np.mean(init.T), 15000, NA, NW, NB, init, optimiser = 'rmsprop',
                                     learning_rate = 10**-2, momentum = 0.9, beta = 0.999, poolsize = poolsize, alpha = alpha)
    """
    Obtaining the solutions on the test set
    """ 
    [Z,A] = forward_propagation(t0, NA, NW, NB)
    v_hat = (init+t0*(A[L]))

1.████████████████████████████████████████████████████████████████████████████████████████████2.█████

KeyboardInterrupt: 