In [45]:
import numpy as np
import numba
from numba import cuda
import matplotlib.pyplot as plt
import pylab

def get_beta(p):
    return p/np.sqrt(1+p**2)
    
def get_gamma(p):
    return np.sqrt(1+p**2)

In [47]:
dt = 0.025
steps = int(1/dt)

X = 800
nx_cell = 10
dx = 1/nx_cell
nx = int(nx_cell*X)
ng_l, ng_r = 0, 0 ### number of guard cells
Nx = nx+ng_l+ng_r
n_cs = 1 ### number of cells in a coarse mesh

Pmax = 10**5
Np = int(4000+1)
p = np.linspace(Pmax,-Pmax,Np).astype(np.float64)
dp = p[0]-p[1]
ptc_gamma = get_gamma(p)
ptc_beta = get_beta(p)
idx_0=np.where(p==0)[0][0]

A=10**-16  ### F_rad = A*P**4
B=0.5*10**-4  ### dN/dt = B*P
C=A/(2*B)  ### P_e = C*P**3
Fcen=0

n_e_ini = 10
n_p_ini = 9
idx_ini=idx_0-1
IC_e, IC_p = np.zeros((Np,Nx)).astype(np.float64) , np.zeros((Np,Nx)).astype(np.float64)
IC_e[idx_ini][ng_l::] = n_e_ini*np.ones(Nx-ng_l)
IC_p[idx_0][ng_l::] = n_p_ini*np.ones(Nx-ng_l)
j0=-n_e_ini*get_beta(p[idx_ini])

E_ini=-np.zeros(Nx+1)
E_max=1000

mapping=[]
for i in range(Np):
    idx_l=idx_0-int(C*p[i]**3//dp)
    mapping.append(idx_l)
    l_per=1-C*p[i]**3%dp/dp
    mapping.append(l_per)
    idx_h=idx_0-int(C*p[i]**3//dp)-1
    mapping.append(idx_h)
    h_per=1-l_per
    mapping.append(h_per)
mapping=np.array(mapping).astype(np.float64)

In [49]:
@cuda.jit
def copy_var(a,b):
    i_start = cuda.grid(1) 
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, b.size, threads_per_grid):  
        b[i] = a[i]

@cuda.jit
def add_var(a,b,c):
    i_start = cuda.grid(1) 
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, c.size, threads_per_grid):  
        c[i] = a[i]+b[i]

@cuda.jit
def clear_var(a):
    i_start = cuda.grid(1) 
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, a.size, threads_per_grid):  
        a[i] = 0

@cuda.jit
def save_data(a,idx,data):
    i_start = cuda.grid(1) 
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, data.size, threads_per_grid): 
        if i == idx:
            for k in range(a.size):
                data[i]+=a[0]

@cuda.jit
def update_E(J,E):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, E.size, threads_per_grid):
            E[i]-=J[i]*dt
        

@cuda.jit
def operate_P(xpd1,xpd2,v,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        idy = i//Nx
        idx = i%Nx
        cell=0.5*(xpd1[i]+xpd2[i])
        cellm1=0.5*(xpd1[i-1]+xpd2[i-1])
        cellp1=0.5*(xpd1[i+1]+xpd2[i+1])
        if idy < idx_0:
            if idx == 0:
                dxpd[i] += -cell*v[idy]*dt/dx
            if idx > 0:
                dxpd[i] += (cellm1-cell)*v[idy]*dt/dx
        if idy > idx_0:
            if idx == Nx-1:
                dxpd[i] += cell*v[idy]*dt/dx
            if idx < Nx-1:
                dxpd[i] += (cell-cellp1)*v[idy]*dt/dx

@cuda.jit
def get_J_gpu(xpd_e1,xpd_e2,xpd_p1,xpd_p2,v,J):
    i_start = cuda.grid(1) 
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x 
    for i in range(i_start, J.size, threads_per_grid):  
        J[i]=-j0
        if i> ng_l and i <Nx:
            for k in range(Np):
                if k < idx_0:
                    J[i] += 0.5*(xpd_p1[k*Nx+i-1]+xpd_p2[k*Nx+i-1]-xpd_e1[k*Nx+i-1]-xpd_e2[k*Nx+i-1])*v[k]
                if k > idx_0:
                    J[i] += 0.5*(xpd_p1[k*Nx+i]+xpd_p2[k*Nx+i]-xpd_e1[k*Nx+i]-xpd_e2[k*Nx+i-1])*v[k]
        if i ==Nx:
            for k in range(Np):
                J[i] += 0.5*(xpd_p1[k*Nx+i-1]+xpd_p2[k*Nx+i-1]-xpd_e1[k*Nx+i-1]-xpd_e2[k*Nx+i-1])*v[k]
        if i <= ng_l:## or i >=Nx-ng_r:
            J[i]=0
        # if abs(J[i]) < 0.005*abs(j0):
        #     J[i]=0
        


@cuda.jit
def operate_E_to_p(xpd1,xpd2,v,p,E,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        idy = i//Nx
        idx = i%Nx
        dxpd[i]=0
        if idx<ng_l :
            dpdt_U=0
            dpdt_M=0
            dpdt_D=0
        if idx==ng_l:
            if idy < idx_0-10:
                dpdt_1_U = (E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx+1])-A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx+1])-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx+1])-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx+1])-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx+1])-A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx+1])-A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx+1])-A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx+1])-A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
            if idy == idx_0-10:
                dpdt_1_U = (E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx+1])-A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx+1])-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx+1])-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx+1])-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_D = (E[idx+1])+Fcen
            if idy == idx_0-9:
                dpdt_1_U = (E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_M = (E[idx+1])+Fcen
                dpdt_D = (E[idx+1])+Fcen
            if idx_0-9 <idy < idx_0+9:
                dpdt_U = (E[idx+1])+Fcen
                dpdt_M = (E[idx+1])+Fcen
                dpdt_D = (E[idx+1])+Fcen
            if idy == idx_0+9:
                dpdt_U = (E[idx+1])+Fcen
                dpdt_M = (E[idx+1])+Fcen
                dpdt_1_D = (E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6   
            if idy == idx_0+10:
                dpdt_U = (E[idx+1])+Fcen
                dpdt_1_M = (E[idx+1])+A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx+1])+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx+1])+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx+1])+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6 
            if idy > idx_0+10:
                dpdt_1_U = (E[idx+1])+A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx+1])+A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx+1])+A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx+1])+A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx+1])+A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx+1])+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx+1])+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx+1])+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
        if idx>ng_l:
            if idy < idx_0-10:
                dpdt_1_U = (E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx]+E[idx+1])*0.5-A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx]+E[idx+1])*0.5-A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
            if idy == idx_0-10:
                dpdt_1_U = (E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx]+E[idx+1])*0.5-A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_D = (E[idx]+E[idx+1])*0.5+Fcen
            if idy == idx_0-9:
                dpdt_1_U = (E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_M = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_D = (E[idx]+E[idx+1])*0.5+Fcen
            if idx_0-9 <idy < idx_0+9:
                dpdt_U = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_M = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_D = (E[idx]+E[idx+1])*0.5+Fcen
            if  idy == idx_0+9:
                dpdt_U = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_M = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_1_D = (E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6  
            if  idy == idx_0+10:
                dpdt_U = (E[idx]+E[idx+1])*0.5+Fcen
                dpdt_1_M = (E[idx]+E[idx+1])*0.5+A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6 
            if idy > idx_0+10:
                dpdt_1_U = (E[idx]+E[idx+1])*0.5+A*p[idy-1]**4+Fcen
                dpdt_2_U = (E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = (E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = (E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = (E[idx]+E[idx+1])*0.5+A*p[idy]**4+Fcen
                dpdt_2_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = (E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = (E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = (E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
        if idy>1 and idy < Np-2:
            if v[idy]>=0:
                if dpdt_M <= 0:
                    dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
                    dxpd[i]+=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                    if dpdt_D>0:
                        dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
                if dpdt_M > 0:
                    dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
                    dxpd[i]-=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                    if dpdt_U < 0:
                        dxpd[i]-= 0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
            if v[idy]<0:
                if dpdt_M >= 0:
                    dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
                    dxpd[i]-=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                    if dpdt_U<0:
                        dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
                if dpdt_M < 0:
                    dxpd[i]+=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                    dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
                    if dpdt_D >0:
                        dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp

                            
@cuda.jit
def operate_E_to_e(xpd1,xpd2,v,p,E,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        idy = i//Nx
        idx = i%Nx
        dxpd[i]=0
        if idx<ng_l:
            dpdt_U=0
            dpdt_M=0
            dpdt_D=0
        if idx==ng_l:
            if idy < idx_0-10:
                dpdt_1_U = -(E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx+1])-A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx+1])-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx+1])-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx+1])-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx+1])-A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx+1])-A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx+1])-A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx+1])-A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
            if idy == idx_0-10:
                dpdt_1_U = -(E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx+1])-A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx+1])-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx+1])-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx+1])-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_D = -(E[idx+1])+Fcen
            if idy == idx_0-9:
                dpdt_1_U = -(E[idx+1])-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx+1])-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx+1])-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx+1])-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_M = -(E[idx+1])+Fcen
                dpdt_D = -(E[idx+1])+Fcen
            if idx_0-9 <idy < idx_0+9:
                dpdt_U = -(E[idx+1])+Fcen
                dpdt_M = -(E[idx+1])+Fcen
                dpdt_D = -(E[idx+1])+Fcen
            if idy == idx_0+9:
                dpdt_U = -(E[idx+1])+Fcen
                dpdt_M = -(E[idx+1])+Fcen
                dpdt_1_D = -(E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6   
            if idy == idx_0+10:
                dpdt_U = -(E[idx+1])+Fcen
                dpdt_1_M = -(E[idx+1])+A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx+1])+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx+1])+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx+1])+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6 
            if idy > idx_0+10:
                dpdt_1_U = -(E[idx+1])+A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx+1])+A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx+1])+A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx+1])+A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx+1])+A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx+1])+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx+1])+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx+1])+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx+1])+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx+1])+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx+1])+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx+1])+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
        if idx>ng_l:
            if idy < idx_0-10:
                dpdt_1_U = -(E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx]+E[idx+1])*0.5-A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx]+E[idx+1])*0.5-A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx]+E[idx+1])*0.5-A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
            if idy == idx_0-10:
                dpdt_1_U = -(E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx]+E[idx+1])*0.5-A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx]+E[idx+1])*0.5-A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_D = -(E[idx]+E[idx+1])*0.5+Fcen
            if idy == idx_0-9:
                dpdt_1_U = -(E[idx]+E[idx+1])*0.5-A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx]+E[idx+1])*0.5-A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_M = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_D = -(E[idx]+E[idx+1])*0.5+Fcen
            if idx_0-9 <idy < idx_0+9:
                dpdt_U = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_M = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_D = -(E[idx]+E[idx+1])*0.5+Fcen
            if  idy == idx_0+9:
                dpdt_U = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_M = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_1_D = -(E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6  
            if  idy == idx_0+10:
                dpdt_U = -(E[idx]+E[idx+1])*0.5+Fcen
                dpdt_1_M = -(E[idx]+E[idx+1])*0.5+A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6 
            if idy > idx_0:
                dpdt_1_U = -(E[idx]+E[idx+1])*0.5+A*p[idy-1]**4+Fcen
                dpdt_2_U = -(E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_1_U*dt/2)**4+Fcen
                dpdt_3_U = -(E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_2_U*dt/2)**4+Fcen
                dpdt_4_U = -(E[idx]+E[idx+1])*0.5+A*(p[idy-1]+dpdt_3_U*dt)**4+Fcen
                dpdt_U = dpdt_1_U/6+dpdt_2_U/3+dpdt_3_U/3+dpdt_4_U/6
                dpdt_1_M = -(E[idx]+E[idx+1])*0.5+A*p[idy]**4+Fcen
                dpdt_2_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_1_M*dt/2)**4+Fcen
                dpdt_3_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_2_M*dt/2)**4+Fcen
                dpdt_4_M = -(E[idx]+E[idx+1])*0.5+A*(p[idy]+dpdt_3_M*dt)**4+Fcen
                dpdt_M = dpdt_1_M/6+dpdt_2_M/3+dpdt_3_M/3+dpdt_4_M/6
                dpdt_1_D = -(E[idx]+E[idx+1])*0.5+A*p[idy+1]**4+Fcen
                dpdt_2_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_1_D*dt/2)**4+Fcen
                dpdt_3_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_2_D*dt/2)**4+Fcen
                dpdt_4_D = -(E[idx]+E[idx+1])*0.5+A*(p[idy+1]+dpdt_3_D*dt)**4+Fcen
                dpdt_D = dpdt_1_D/6+dpdt_2_D/3+dpdt_3_D/3+dpdt_4_D/6
        if v[idy]>=0:
            if dpdt_M <= 0:
                dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
                dxpd[i]+=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                if dpdt_D>0:
                    dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
            if dpdt_M > 0:
                dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
                dxpd[i]-=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                if dpdt_U < 0:
                    dxpd[i]-= 0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
        if v[idy]<0:
            if dpdt_M >= 0:
                dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp
                dxpd[i]-=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                if dpdt_U<0:
                    dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
            if dpdt_M < 0:
                dxpd[i]+=0.5*(xpd1[i]+xpd2[i])*dpdt_M*dt/dp
                dxpd[i]-=0.5*(xpd1[i-Nx]+xpd2[i-Nx])*dpdt_U*dt/dp
                if dpdt_D >0:
                    dxpd[i]+=0.5*(xpd1[i+Nx]+xpd2[i+Nx])*dpdt_D*dt/dp

@cuda.jit
def ext_ptc_1(E,max,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        dxpd[i]=0
        if i ==(idx_0)*Nx+ng_l:
            dxpd[i]+=E[ng_l+1]*(abs(E[ng_l+1])/E_max)*dt/dx
            if E[ng_l+1]*(abs(E[ng_l+1])/E_max)*dt/dx>max*dt:
                dxpd[i]=max*dt
            if E[ng_l+1]*(abs(E[ng_l+1])/E_max)*dt/dx<-max*dt:
                dxpd[i]=-max*dt

@cuda.jit
def ext_ptc_2(xpd_e,xpd_p,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        if dxpd[i]>0:
            xpd_p[i]+=dxpd[i]
        if dxpd[i]<0:
            xpd_e[i]-=dxpd[i]            

@cuda.jit
def get_box(xpd,box_xpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, box_xpd.size, threads_per_grid):
        idy = i // nx
        idx = i % nx
        box_xpd[i] =xpd[idx+ng_l+Nx*idy]


@cuda.jit
def inst_pp(xpd_e,xpd_p,p,mapping,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        idx = i%Nx
        idy = i//Nx
        dxpd[i]=0
        if idx>=ng_l and idx<=Nx-ng_r:
            for k in range(mapping.size):
                if idy== mapping[k] and abs(idy-idx_0)>=1:
                    ptc_idy=k//4
                    dxpd[i]+=xpd_e[ptc_idy*Nx+idx]*mapping[k+1]*dt*B*abs(p[ptc_idy])
                    dxpd[i]+=xpd_p[ptc_idy*Nx+idx]*mapping[k+1]*dt*B*abs(p[ptc_idy])

@cuda.jit
def get_den(xpd_e,xpd_p,gamma,den,den_eff):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, den.size, threads_per_grid):
        tot_n_e=0
        tot_n_p=0
        tot_ga_e=0
        tot_ga_p=0
        tot_gam3_e=0
        tot_gam3_p=0
        for l in range(Np):
            tot_n_e += xpd_e[l*Nx+ng_l+i]
            tot_n_p += xpd_p[l*Nx+ng_l+i]
            tot_ga_e += xpd_e[l*Nx+ng_l+i]*abs(gamma[l])
            tot_ga_p += xpd_p[l*Nx+ng_l+i]*abs(gamma[l])
            tot_gam3_e += xpd_e[l*Nx+ng_l+i]*abs(gamma[l])**-3
            tot_gam3_p += xpd_p[l*Nx+ng_l+i]*abs(gamma[l])**-3
        ave_ga_e = tot_ga_e/tot_n_e
        ave_ga_p = tot_ga_p/tot_n_p
        ave_gam3_e = tot_gam3_e/tot_n_e
        ave_gam3_p = tot_gam3_p/tot_n_p
        den[i] = tot_n_e/ave_ga_e+tot_n_p/ave_ga_p
        den_eff[i] = tot_n_e*ave_gam3_e+tot_n_p*ave_gam3_p

@cuda.jit
def get_eng_k(xpd,p,eng):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, eng.size, threads_per_grid):
        for k in range(xpd.size):
            idy = k//Nx
            idx = k%Nx
            gamma=(1+p[idy]**2)**0.5
            if idx>=ng_l and idx<Nx:
                eng[i]+=gamma*(xpd[k])*dx

@cuda.jit
def get_eng_boundary(xpd1,xpd2,p,eng,sw=1):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, eng.size, threads_per_grid):
        for k in range(Np):
            beta=p[k]/(1+p[k]**2)**0.5
            gamma=(1+p[k]**2)**0.5
            if sw==1:
                if beta<0:
                    eng[i]-=0.5*(xpd1[k*Nx+ng_l]+xpd2[k*Nx+ng_l])*gamma*dt*beta
                if beta>0:
                    eng[i]+=0.5*(xpd1[k*Nx+Nx-1]+xpd2[k*Nx+ng_l])*gamma*dt*beta
            if sw!=1:
                eng[i]+=abs(xpd1[k*Nx+ng_l])*gamma*dx

@cuda.jit
def get_eng_E(E,eng):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, eng.size, threads_per_grid):
        eng[i]=0
        for k in range(E.size-1):
            eng[i]+=(E[k]+E[k+1])**2*0.125*dx

@cuda.jit
def get_eng_B(E,eng):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, eng.size, threads_per_grid):
        for k in range(E.size-1):
            eng[i]+=(E[k]+E[k+1])*0.5*j0*dx*dt

@cuda.jit
def get_eng_rad(xpd1,xpd2,p,dxpd):
    i_start = cuda.grid(1)
    threads_per_grid = cuda.blockDim.x * cuda.gridDim.x
    for i in range(i_start, dxpd.size, threads_per_grid):
        idx = i%Nx
        idy = i//Nx
        if idx>=ng_l and idx <Nx:
            if abs(idy-idx_0)>=10:
                dxpd[i]+=(xpd1[i]+xpd2[i])*A*p[idy]**4*dt*dx


In [51]:
data_adr=data_adr='E:/Data/test1_070824/'
tot_steps=steps*4000
dev_gamma=cuda.to_device(ptc_gamma)
dev_beta = cuda.to_device(ptc_beta)
dev_p=cuda.to_device(p)
dev_E = cuda.to_device(E_ini)
dev_xpd_box=cuda.to_device(np.zeros((Np,nx)).flatten())
dev_xpd1_e = cuda.to_device(IC_e.flatten())
dev_xpd2_e = cuda.to_device(IC_e.flatten()) 
dev_dxpd = cuda.to_device(np.zeros(IC_e.flatten().size))
dev_dxpd2 = cuda.to_device(np.zeros(IC_e.flatten().size))
dev_xpd1_p = cuda.to_device(IC_p.flatten())
dev_xpd2_p = cuda.to_device(IC_p.flatten())
dev_J = cuda.to_device(np.zeros(E_ini.size)) 
# dev_J = cuda.to_device(J)
# dev_eng=cuda.to_device(np.zeros(1))
dev_eng_esc=cuda.to_device(np.zeros(1))
dev_eng_inj=cuda.to_device(np.zeros(1))
dev_eng_B=cuda.to_device(np.zeros(1))
dev_eng_pp=cuda.to_device(np.zeros(1))
dev_mapping=cuda.to_device(mapping)
dev_tem_pp=cuda.to_device(np.zeros(IC_e.flatten().size))
dev_den=cuda.to_device(np.zeros(nx))
dev_den_eff=cuda.to_device(np.zeros(nx))
# dev_den=cuda.to_device(den)
# dev_deneff=cuda.to_device(deneff)

In [53]:
for i in range(tot_steps):
    if i % (steps) ==0:
        get_box[1024,1024](dev_xpd1_e,dev_xpd_box)
        xpd_e=np.reshape(dev_xpd_box.copy_to_host(),(Np,nx))
        get_box[1024,1024](dev_xpd1_p,dev_xpd_box)
        xpd_p=np.reshape(dev_xpd_box.copy_to_host(),(Np,nx))
        E=dev_E.copy_to_host()
        J=dev_J.copy_to_host()
        get_den[1024,1024](dev_xpd1_e,dev_xpd1_p,dev_gamma,dev_den,dev_den_eff)
        np.save(data_adr+'e_xpd' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),xpd_e)
        np.save(data_adr+'p_xpd' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),xpd_p)
        np.save(data_adr+'E' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),E)
        np.save(data_adr+'J' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),J)
        np.save(data_adr+'den' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),dev_den)
        np.save(data_adr+'deneff' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),dev_den_eff)
    
#         get_eng_k[1024,1024](dev_xpd1_e,dev_p,dev_eng)
#         cuda.synchronize()
#         get_eng_k[1024,1024](dev_xpd1_p,dev_p,dev_eng)
#         cuda.synchronize()
#         get_eng_k[1024,1024](dev_tem_pp,dev_p,dev_eng_pp)
#         cuda.synchronize()
#         eng_K=dev_eng.copy_to_host()
#         eng_pp=dev_eng_pp.copy_to_host()
#         np.save(data_adr+'eng/'+'eng_K' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_K)
#         clear_var[1024,1024](dev_eng)
#         cuda.synchronize()
        
#         get_eng_E[1024,1024](dev_E,dev_eng)
#         cuda.synchronize()
#         eng_E=dev_eng.copy_to_host()
#         np.save(data_adr+'eng/'+'eng_E' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_E)
#         clear_var[1024,1024](dev_eng)
#         cuda.synchronize()
        
#         eng_esc=dev_eng_esc.copy_to_host()
#         eng_inj=dev_eng_inj.copy_to_host()
#         eng_B=dev_eng_B.copy_to_host()
#         eng_rad=np.sum(dev_dxpd2.copy_to_host())
#         eng_pp=dev_eng_pp.copy_to_host()
#         clear_var[1024,1024](dev_eng_pp)
#         cuda.synchronize()
#         np.save(data_adr+'eng/'+'eng_esc' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_esc)
#         np.save(data_adr+'eng/'+'eng_inj' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_inj)
#         np.save(data_adr+'eng/'+'eng_B' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_B)
#         np.save(data_adr+'eng/'+'eng_rad' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_rad)
#         np.save(data_adr+'eng/'+'eng_pp' + '0'*(5-len(str(int(i//steps))))+str(int(i//steps)),eng_pp*2)
    
    for j in range(40):
        operate_E_to_e[1024,1024](dev_xpd1_e,dev_xpd2_e,dev_beta,dev_p,dev_E,dev_dxpd)
        cuda.synchronize()
        operate_P[1024,1024](dev_xpd1_e,dev_xpd2_e,dev_beta,dev_dxpd)
        cuda.synchronize()
        add_var[1024,1024](dev_xpd1_e,dev_dxpd,dev_xpd2_e)
        cuda.synchronize()
    
    for k in range(40):
        operate_E_to_p[1024,1024](dev_xpd1_p,dev_xpd2_p,dev_beta,dev_p,dev_E,dev_dxpd)
        cuda.synchronize()
        operate_P[1024,1024](dev_xpd1_p,dev_xpd2_p,dev_beta,dev_dxpd)
        cuda.synchronize()
        add_var[1024,1024](dev_xpd1_p,dev_dxpd,dev_xpd2_p)
        cuda.synchronize()

    get_J_gpu[1024,1024](dev_xpd1_e,dev_xpd2_e,dev_xpd1_p,dev_xpd2_p,dev_beta,dev_J)
    cuda.synchronize()
    
#     get_eng_boundary[1024,1024](dev_xpd2_e,dev_xpd2_e,dev_p,dev_eng_esc,1)
#     cuda.synchronize()
#     get_eng_boundary[1024,1024](dev_xpd2_e,dev_xpd2_p,dev_p,dev_eng_esc,1)
#     cuda.synchronize()

#     get_eng_rad[1024,1024](dev_xpd2_e,dev_xpd2_p,dev_p,dev_dxpd2)
#     cuda.synchronize()
#     get_eng_B[1024,1024](dev_E,dev_eng_B)
#     cuda.synchronize()

    
    copy_var[1024,1024](dev_xpd2_e,dev_xpd1_e)
    cuda.synchronize()
    copy_var[1024,1024](dev_xpd2_p,dev_xpd1_p)
    cuda.synchronize()
    
    
    update_E[1024,1024](dev_J,dev_E)
    cuda.synchronize()
    clear_var[1024,1024](dev_dxpd)
    cuda.synchronize()

    ext_ptc_1[1024,1024](dev_E,1,dev_dxpd)
    cuda.synchronize()
    ext_ptc_2[1024,1024](dev_xpd1_e,dev_xpd1_p, dev_dxpd)
    cuda.synchronize()
    # get_eng_boundary[1024,1024](dev_dxpd,dev_dxpd,dev_p,dev_eng_inj,0)
    # cuda.synchronize()
    clear_var[1024,1024](dev_dxpd)
    cuda.synchronize()

    inst_pp[1024,1024](dev_xpd1_e,dev_xpd1_p,dev_p,dev_mapping,dev_dxpd)
    cuda.synchronize()
    # add_var[1024,1024](dev_tem_pp,dev_dxpd,dev_tem_pp)
    # cuda.synchronize()
    add_var[1024,1024](dev_xpd1_p,dev_dxpd,dev_xpd1_p)
    cuda.synchronize()
    add_var[1024,1024](dev_xpd1_e,dev_dxpd,dev_xpd1_e)
    cuda.synchronize()
    clear_var[1024,1024](dev_dxpd)
    cuda.synchronize()

KeyboardInterrupt: 

In [173]:
get_J_gpu[1024,1024](dev_xpd1_e,dev_xpd2_e,dev_xpd1_p,dev_xpd2_p,dev_beta,dev_J)
dev_J.copy_to_host()

array([0.        , 0.        , 0.        , ..., 0.04937504, 0.04937504,
       0.04937504])

In [177]:
ptc_beta

array([ 1.00050025,  1.        ,  1.        , ..., -1.        ,
       -1.        , -1.00050025])

In [179]:
j0

-9.999500037496876

In [188]:
ptc_beta[999]

0.995012499921876

In [186]:
dev_beta.copy_to_host()[1000]

0.0

In [14]:
for i in range(3,5):
    print(i)

3
4
