In [1]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from pylab import *
rc('axes', linewidth=3)

In [2]:
"""
Setting up the parameters of the RFMEONP
"""
sites = [7,5]#Number of sites in each RFMEO
lamda1 = np.ones(8)#Parameters of RFMEO 1
lamda2 = np.ones(6)#Parameters of RFMEO 2
lamda = np.array([lamda1,lamda2])
l = 1 #site size
p = sum(sites)+1#Total Number of sites in each rfmeo in the network plus the pooling function

In [3]:
"""
Generating the Training and Test Sets
"""

"""
Training Set Generation
"""
h = 0.1#Training Step Size
t = np.arange(0,1+h, h)
Xtr = t.reshape(1,len(t))
ntrain = Xtr.shape[1]# Number of training samples

"""
Testing Set Generation
"""
h = 0.0001#Testing Step Size
t0 = np.arange(0, 0.9+h, h)
Xte = t0.reshape(1, len(t0))
ntest = Xte.shape[1]#Number of testing samples

In [4]:
"""
Generating the Training and Test Sets
"""

"""
Training Set Generation
"""
h = 0.1#Training Step Size
t = np.arange(0,1+h, h)
Xtr = t.reshape(1,len(t))
n = Xtr.shape[1]#Number of training samples

"""
Testing Set Generation
"""
h = 0.0001#Testing Step Size
t0 = np.arange(0, 0.9+h, h)
Xte = t0.reshape(1, len(t0))
nprime = Xte.shape[1]#Number of testing samples

In [5]:
"""
Governing system of ODE
"""
scale = 10000 # Scaling parameter for the system of ode

def G(Z):
    return Z

def Gprime(Z):
    return 1.0

def funct(y, lamda, sites, l, H):
    f = y.copy()
    n = sum(sites)
    Y = 2*H*y[n]
    if l>1:
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            ii = 0
            z1 = 0
            z2 = 0
            for j in range(k, k+l):
                z1 += y[j]
                z2 += y[j+1]
            f[k] = lamda[i][ii]*G(Y)*(1-z1)-lamda[i][ii+1]*y[k]*(1-z2)

            ii += 1

            for tt in range(k+1, k+p-l):
                z1 = 0
                z2 = 0
                for j in range(tt, tt+l):
                    z1 += y[j]
                    z2 += y[j+1]
                f[tt] = lamda[i][ii]*y[tt-1]*(1-z1)-lamda[i][ii+1]*y[tt]*(1-z2)#verify
                ii += 1
    #         print(z2)
            f[k+p-l] = lamda[i][ii]*y[tt]*(1-z2)-lamda[i][ii+1]*y[k+p-l]
            ii += 1

            for kk in range(k+p-l+1, k+p):
                f[kk] = lamda[i][ii]*y[kk-1]-lamda[i][ii+1]*y[kk]
                ii += 1

    #         sitesn += lamda[i][ii]*y[k+p-1]
        sites0 = 0
        sitesn = 0
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            z1 = 0
            for j in range(k,k+l):
                z1 += y[j]
            sites0 += lamda[i][0]*G(Y)*(1-z1)
            sitesn += lamda[i][p]*y[k+p-1]
        f[n] = 1/(2*H)*(sitesn-sites0)
    else:
        sites0 = 0
        sitesn = 0
        ii = 0
        z = 2*H*y[n]
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            ii = 0
            f[k] = lamda[i][ii]*G(z)*(1-y[k])-lamda[i][ii+1]*y[k]*(1-y[k+1])
            sites0 += lamda[i][ii]*G(z)*(1-y[k])
            ii += 1
            for j in range(k+1, k+p-1):
                f[j] = lamda[i][ii]*y[j-1]*(1-y[j])-lamda[i][ii+1]*y[j]*(1-y[j+1])
                ii += 1
            f[k+p-1] = lamda[i][ii]*y[k+p-2]*(1-y[k+p-1])-lamda[i][ii+1]*y[k+p-1]
            sitesn += lamda[i][ii+1]*y[k+p-1]
        f[n] = 1/(2*H)*(sitesn-sites0)
    return scale*f

In [12]:
"""
Setting Up the Neural Network for ODE
"""

def init_weights_biases(initialiser, N0, N1):
    if initialiser.upper() == 'NORMAL':
        return [np.random.normal(0,np.sqrt(2/N1),(N1,N0)), np.random.normal(0, np.sqrt(2/N1), (N1,1))]
    if initialiser.upper() == 'UNIFORM':
        return [np.random.uniform(0,np.sqrt(2/N1),(N1,N0)),np.random.uniform(0,0.05,(N1,1))]
    if initialiser.upper() == 'XAVIER':
        return [np.random.uniform(0, 1/np.sqrt(N1), (N1,N0)), np.random.normal(0, 1/np.sqrt(N1), (N1,1))]
    else:
        return -1
    
def add_layer(input_shape, hidden_units , activation = 'sigmoid', initialiser = 'normal'): 
    weights_biases = init_weights_biases(initialiser, input_shape, hidden_units)
    NA.append(activation)
    NW.append(weights_biases[0])
    NB.append(weights_biases[1])
    return None

def activation_function(x, string, alpha = 0.01):
    if string.upper() == 'SIGMOID':
        return (1/(1+np.exp(-x)))
    if string.upper() == 'BPS':
        return  2*(1/(1+np.exp(-x)))-1
    if string.upper() == 'TRIG' or string.upper() == 'TRIGNOMETRIC':
        return np.cos(x)
    if string.upper() == 'CUSTOM':
        return 0.9*np.tanh(x)-0.5*(1/(1+np.exp(-x)))
    if string.upper() == 'TANH':
        return np.tanh(x)
    if string.upper() == 'RELU':
        return (x+np.abs(x))/2
    if string.upper() == 'LEAKYRELU' or string.upper() == 'LR':
        return (x+alpha*x+np.abs(x-alpha*x))/2
    if string.upper() == 'LINEAR':
        return x
    if string.upper() == 'EXPONENTIAL' or string.upper() == 'EXP':
        return np.exp(x)
    if string.upper() == 'ELU':
        x[x<0] = 0.01*(np.exp(x[x<0])-1)
        return x
    if string.upper() == 'EXP':
        return np.exp(-x)
    return None
    
def activation_derivative(x, string, alpha = 0.01):
    if string.upper() == 'SIGMOID':
        return (1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'BPS':
        return 2*(1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'TRIG' or string.upper() == 'TRIGNOMETRIC':
        return -np.sin(x)
    if string.upper() == 'TANH':
        return (1-np.tanh(x)**2)
    if string.upper() == "RELU":
        x[x<0] = 0
        x[x>=0] = 1
        return x
    if string.upper() == 'CUSTOM':
        return 0.9*(1-np.tanh(2*x)**2)-0.5*(1/(1+np.exp(-x)))*(1-(1/(1+np.exp(-x))))
    if string.upper() == 'LEAKYRELU' or string.upper() == 'LR':
        dx = np.ones(x.shape)
        dx[x < 0] = alpha
        x = dx.copy()
        return x
    if string.upper() == 'LINEAR':
        x = 1
        return x
    if string.upper() == 'EXPONENTIAL' or string.upper() == 'EXP':
        return np.exp(x)
    if string.upper() == 'ELU':
        x[x>=0] = 1
        x[x<0] = 0.01*(np.exp(x[x<0]))
        return x
    if string.upper() == 'EXP':
        return -np.exp(-x)
    return None
    
def forward_propagation(X,NA,NW,NB):
    A = [X]
    Z = []
    for i in range(len(NA)):
        Zstar = (NW[i]@A[i]+NB[i])
        Astar = activation_function(Zstar.astype(float), NA[i])
        Z.append(Zstar.astype(float))
        A.append(Astar)
    return([Z,A])

def backward_propagation(NA, NW, Z, A, dZ, dW, dB, y_hat, f, H, sites, l, lamda, alpha):
    Adot = (np.gradient(A[L],0.1)[0]/scale)-10**-10
    Adotdot = (np.gradient(Adot,0.1)[0]/scale)
    dAL = [0 for i in range(sum(sites)+1)]
    y = y_hat.copy()
    n = sum(sites)
    if l>1:
        sites0 = 0
        total = y[n]
        penalty = alpha#Tuning parameter for the COP Loss
        penalty2 = 1
        for j in range(n):
            total += y[j]/(2*H)
        Y = 2*H*y[n]
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            ii = 0
            z2 = 0
            for j in range(k,k+l):
                z2 += y[j+1]
            df0a0 = -lamda[i][ii]*G(Y)*t-lamda[i][ii+1]*t*(1-z2)
            df1a0 = lamda[i][ii+1]*t*(1-z2)
            dfza0 = lamda[i][0]*G(Y)*t/(2*H)
            dAL[k] = (A[L][k]+t*Adot[k]-f[k])*(1+t*Adotdot[k]/Adot[k]-df0a0)-(A[L][k+1]+t*Adot[k+1]-f[k+1])*df1a0-(A[L][n]+t*Adot[n]-f[n])*dfza0-penalty*(0.5-total)*t

            z2 = 0
            for j in range(k+1, k+l+1):
                z2 += y[j+1]
            df0a1 = -lamda[i][ii]*G(Y)*t+lamda[i][ii+1]*y[k]*t
            df1a1 = -lamda[i][ii+1]*y[k]*t-lamda[i][ii+2]*t*(1-z2)
            df2a1 = lamda[i][ii+2]*t*(1-z2)
            dfza1 = 1/(2*H)*lamda[i][0]*G(Y)*t
            dAL[k+1] = -(A[L][k]+t*Adot[k]-f[k])*df0a1+(A[L][k+1]+t*Adot[k+1]-f[k+1])*(1+t*Adotdot[k+1]/Adot[k+1]-df1a1)-(A[L][k+2]+t*Adot[k+2]-f[k+2])*df2a1-(A[L][n]+t*Adot[n]-f[n])*dfza1-penalty*(0.5-total)*t

            for q in range(2, l):
                dfa2L = [0 for i in range(q+2)]
                dfa2R = [0 for i in range(q+2)]
                dfa2L[0] = -lamda[i][ii]*G(Y)*t
                dfa2R[0] = lamda[i][ii+1]*y[k]*t
                for kk in range(1, len(dfa2L)-2):
                    dfa2L[kk] = -dfa2R[kk-1]
                    dfa2R[kk] = lamda[i][kk+1]*y[k+kk]*t
                dfa2L[kk+1] = -dfa2R[kk]
                z2 = 0
                for j in range(k+q, k+q+l):
                    z2 += y[j+1]
                dfa2R[kk+1] = -lamda[i][kk+2]*t*(1-z2)
                dfa2L[kk+2] = -dfa2R[kk+1]
                for o in range(len(dfa2L)):
                    if o != len(dfa2L)-2:
                        dAL[k+q] += -(A[L][k+o]+t*Adot[k+o]-f[k+o])*(dfa2L[o]+dfa2R[o])
                    else:
                        dAL[k+q] += (A[L][k+o]+t*Adot[k+o]-f[k+o])*(1+t*Adotdot[k+o]/Adot[k+o]-(dfa2L[o]+dfa2R[o]))
                dAL[k+q] += -(A[L][n]+t*Adot[n]-f[n])*dfza0-penalty*(0.5-total)*t

            for q in range(l, p-l):
                dfa3L = [0 for i in range(l+2)]
                dfa3R = [0 for i in range(l+2)]
                dfa3R[0] = lamda[i][ii+1]*y[k+ii]*t
                for kk in range(1, l):
                    dfa3L[kk] = -dfa3R[kk]
                    dfa3R[kk] = lamda[i][kk+ii+1]*y[k+kk+ii]*t
                dfa3L[l] = -dfa3R[l-1]
                z2 = 0
                for j in range(l):
                    z2 += y[k+q+j+1]
                dfa3R[l] = -lamda[i][q+1]*t*(1-z2)
                dfa3L[l+1] = -dfa3R[l]
                for r in range(l+2):
                    if r!= l:
                        dAL[k+q] += -(A[L][k+r+ii]+t*Adot[k+r+ii]-f[k+r+ii])*(dfa3L[r]+dfa3R[r])
                    if r == l:
                        dAL[k+q] += (A[L][k+r+ii]+t*Adot[k+r+ii]-f[k+r+ii])*(1+t*Adotdot[k+r+ii]/Adot[k+r+ii]-(dfa3L[r]+dfa3R[r]))
                dAL[k+q] += -penalty*(0.5-total)*t
                ii += 1
            dfa7R = [0 for i in range(l+2)]
            dfa7L = [0 for i in range(l+2)]
            dfa7R[0] = lamda[i][q-l+2]*y_hat[k+q-l+1]*t
            for kk in range(1,l):
                dfa7L[kk] = -dfa7R[kk-1]
                dfa7R[kk] = lamda[i][q-l+2+kk]*y_hat[k+q-l+1+kk]*t
            dfa7L[l] = -dfa7R[l-1]
            dfa7R[l] = -lamda[i][q-l+2+kk+1]*t*np.ones_like(A[L][k+p-l]) 
            dfa7L[l+1] = -dfa7R[l]
            for r in range(l+2):
                if r!= l:
                    dAL[k+p-l] += -(A[L][k+r+ii]+t*Adot[k+r+ii]-f[k+r+ii])*(dfa7L[r]+dfa7R[r])
                if r == l:
                    dAL[k+p-l] += (A[L][k+r+ii]+t*Adot[k+r+ii]-f[k+r+ii])*(1+t*Adotdot[k+r+ii]/Adot[k+r+ii]-(dfa7L[r]+dfa7R[r]))
            dAL[k+p-l] += -penalty*(0.5-total)*t
            ii += 1

            qq = 0
            for q in range(p-l+1,p-1):
                dfa12L = [0 for i in range(l+2-qq)]
                dfa12R = [0 for i in range(l+2-qq)]
                qq += 1
                dfa12R[0] = lamda[i][q-l+1]*y_hat[k+q-l]*t
                for kk in range(1,len(dfa12R)-3):
                        dfa12L[kk] = -dfa12R[kk-1]
                        dfa12R[kk] = lamda[i][q-l+kk+1]*y_hat[k+q-l+kk]*t
                dfa12L[kk+1] = -dfa12R[kk]
                dfa12R[kk+2] = -lamda[i][q+1]*t*np.ones_like(A[L][k+q])
                dfa12L[kk+3] = -dfa12R[kk+2]
                for r in range(len(dfa12R)-2):
                    dAL[k+q] += -(A[L][k+r+ii]+t*Adot[k+r+ii]-f[k+r+ii])*(dfa12L[r]+dfa12R[r])
                dAL[k+q] += (A[L][k+q]+t*Adot[k+q]-f[k+q])*(1+t*Adotdot[k+q]/Adot[k+q]-(dfa12L[r+1]+dfa12R[r+1]))-(A[L][k+q+1]+t*Adot[k+q+1]-f[k+q+1])*(dfa12L[r+2]+dfa12R[r+2])-penalty*(0.5-total)*t
                ii += 1

            df6a9 = lamda[i][p-l]*y[k+p-l-1]*t
            df7a9 = -df6a9
            df9a9 = -lamda[i][p]*t
            dfza9 = 1/(2*H)*lamda[i][p]*t
            dAL[k+p-1] = -(A[L][k+p-l-1]+t*Adot[k+p-l-1]-f[k+p-l-1])*df6a9-(A[L][k+p-l]+t*Adot[k+p-l]-f[k+p-l])*df7a9+(A[L][k+p-1]+t*Adot[k+p-1]-f[k+p-1])*(1+t*Adotdot[k+p-1]/Adot[k+p-1]-df9a9)-(A[L][n]+t*Adot[n]-f[n])*dfza9-penalty*(0.5-total)*t
        s = 0
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            z1 = 0
            for j in range(k, k+l):
                z1 += y[j]
            df0az = (lamda[i][0]*(1-z1)*Gprime(Y)*2*H*t)
            s += -df0az
            dAL[n] += -(A[L][k]+t*Adot[k]-f[k])*df0az
        dAL[n] += (A[L][n]+t*Adot[n]-f[n])*(1+t*Adotdot[n]/Adot[n]-s)-penalty*2*H*(0.5-total)*t
    else:
        Y = 2*H*y[n]
        total = y[n]
        penalty = 200
        penalty2 = 1
        for j in range(n):
            total += y[j]/(2*H)
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            p = sites[i]
            ii = 0
            df0a0 = -lamda[i][ii]*G(Y)*t-lamda[i][ii+1]*t*(1-y[k+1])
            df1a0 = lamda[i][ii+1]*t*(1-y[k+1])
            dfza0 = +1/(2*H)*lamda[i][0]*G(Y)*t
            dAL[k] = (A[L][k]+t*Adot[k]-f[k])*(1+t*Adotdot[k]/Adot[k]-df0a0)-(A[L][k+1]+t*Adot[k+1]-f[k+1])*df1a0-(A[L][n]+t*Adot[n]-f[n])*dfza0-penalty*(0.5-total)*t
            ii += 1

            for j in range(k+1, k+p-1):
                df0a1 = lamda[i][ii]*y[j-1]*t
                df1a1 = -lamda[i][ii]*y[j-1]*t-lamda[i][ii+1]*t*(1-y[j+1])
                df2a1 = lamda[i][ii+1]*t*(1-y[j+1])
                dAL[j] = -(A[L][j-1]+t*Adot[j-1]-f[j-1])*df0a1+(A[L][j]+t*Adot[j]-f[j])*(1+t*Adotdot[j]/Adot[j]-df1a1)-(A[L][j+1]+t*Adot[j+1]-f[j+1])*df2a1-penalty*(0.5-total)*t
                ii += 1

            df4a5 = lamda[i][ii]*y[ii-1]*t
            df5a5 = -lamda[i][ii]*y[ii-1]*t-lamda[i][ii+1]*t
            dfza5 = 1/(2*H)*lamda[i][ii+1]*t
            dAL[k+p-1] = penalty2*(-(A[L][k+p-2]+t*Adot[k+p-2]-f[k+p-2])*df4a5+(A[L][k+p-1]+t*Adot[k+p-1]-f[k+p-1])*(1+t*Adotdot[k+p-1]/Adot[k+p-1]-df5a5)-(A[L][n]+t*Adot[n]-f[n])*dfza5)-penalty*(0.5-total)*t
        s = 0
        for i in range(len(sites)):
            k = int(sum(sites[:i]))
            df0az = lamda[i][0]*(1-y[k])
            s += -df0az
            dAL[n] += -penalty2*(A[L][k]+t*Adot[k]-f[k])*Gprime(Y)*t*df0az
        dAL[n] += penalty2*((A[L][n]+t*Adot[n]-f[n])*(1+t*Adotdot[n]/Adot[n]-Gprime(Y)*2*H*t*s))-2*H*penalty*(0.5-total)*t
    dtAL = np.array(dAL).reshape((n+1,ntrain))/ntrain
    for i in range(L-1,-1,-1):    
        dZ[i] = dtAL*activation_derivative(Z[i],NA[i])
        dW[i] = (dZ[i]@A[i].T)/ntrain
        dB[i] = np.sum(dZ[i], axis = 1, keepdims = True)/ntrain
        dtAL = (NW[i].T@dZ[i])/ntrain
    return [dZ, dW, dB]

def rmsprop(NW, NB, dW, dB, SW, SB, epsilon, lr, beta):
    for i in range(L):
        SW[i] = (beta*SW[i]+(1-beta)*dW[i]**2)
        SB[i] = (beta*SB[i]+(1-beta)*dB[i]**2)
        NW[i] = NW[i]-lr*dW[i]/(SW[i]**0.5+epsilon)
        NB[i] = NB[i]-lr*dB[i]/(SB[i]**0.5+epsilon)
    return [NW, NB, SW, SB]

def adam(i, NW, NB, dW, dB, VW, VB, SW, SB, epsilon, lr, momentum, beta):
    VWhat = VW.copy()
    VBhat = VB.copy()
    SWhat = SW.copy()
    SBhat = SB.copy()
    for j in range(L):
        VW[j] = momentum*VW[j]+(1-momentum)*dW[j]
        VB[j] = momentum*VB[j]+(1-momentum)*dB[j]
        SW[j] = beta*SW[j]+(1-beta)*(dW[j]**2)
        SB[j] = beta*SB[j]+(1-beta)*(dB[j]**2)
        VWhat[j] = VW[j]/(1-momentum**i)
        VBhat[j] = VB[j]/(1-momentum**i)
        SWhat[j] = SW[j]/(1-beta**i)
        SBhat[j] = SB[j]/(1-beta**i)
        NW[j] = NW[j]-lr*(VWhat[j]/np.sqrt(SWhat[j]+epsilon))
        NB[j] = NB[j]-lr*(VBhat[j]/np.sqrt(SBhat[j]+epsilon))
    return [NW, NB, VW, VB, SW, SB]

def train_model(X, epochs, NA, NW, NB, optimiser = 'rmsprop', loss = 'binary_cross_entropy', learning_rate = 0.001, 
                momentum = 0.9, epsilon = 10**-8, beta = 0.999, l = 2, poolsize = 1, alpha = 1):
    [dZ, dW, dB] = [[0 for i in range(L)],[0 for i in range(L)],[0 for i in range(L)]]
    VW = [np.zeros(NW[i].shape) for i in range(L)]
    VB = [np.zeros(NB[i].shape) for i in range(L)]
    SW = [np.zeros(NW[i].shape) for i in range(L)]
    SB = [np.zeros(NB[i].shape) for i in range(L)]
    for i in range(epochs):
        [Z, A] = forward_propagation(X, NA, NW, NB)
        yhat = init+t*(A[L])
        f = funct(yhat, lamda, sites, l, poolsize)
        [dZ, dW, dB] = backward_propagation(NA, NW, Z, A, dZ, dW, dB, yhat, f, poolsize, sites, l, lamda, alpha)
        if optimiser.upper() == 'RMSPROP':
            [NW, NB, SW, SB] = rmsprop(NW, NB, dW, dB, SW, SB, epsilon, learning_rate, beta)
        elif optimiser.upper() == 'ADAM':
            [NW, NB, VW, VB, SW, SB] =  adam(i+1, NW, NB, dW, dB, VW, VB, SW, SB, epsilon, learning_rate, momentum, beta)
        
        if (i/epochs)*100 in range(100):
            print('█', end = '')
#     print('\n')
    return [NW, NB]

In [22]:
"""
Training and Testing the Network
"""
ensemblesize = 6
l = 2
h = 2
for it in range(ensemblesize):
    plt.figure(figsize = (12, 5))
    print(it+1, end = '.')
    b = min(1/l, h*(1/sum(sites)))
    init = np.random.uniform(0, b, (p,1))
    init[-1] = 0.5-1/(2*h)*sum(init[:-1])
    
    [NA, NW, NB] = [[],[],[]]
    add_layer(input_shape = 1, hidden_units = p, activation = 'bps', initialiser = 'xavier')
    for i in range(L-2):
        add_layer(input_shape = p, hidden_units = p, activation = 'sigmoid', initialiser = 'xavier')
    add_layer(input_shape = p, hidden_units = p, activation = 'bps', initialiser = 'xavier')
    
    """
    Training the CDNN on the train set
    """
    [NW, NB] = train_model(Xtr, 8000, NA, NW, NB, optimiser = 'rmsprop',
                                     learning_rate = 10**-2, momentum = 0.9, beta = 0.999, l = l, poolsize = h, alpha = alpha)
    [Z,A] = forward_propagation(Xte, NA, NW, NB)
    """
    Obtaining the solutions on the test set
    """
    vhat = (init+Xte*(A[L]))
    for j in range(p):
        plt.plot(t0.ravel(), vhat[j])
    plt.show()