<a href="https://colab.research.google.com/github/HadasRavikovitch/Final-Project---GPU/blob/main/cross_time_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from numba import cuda
import math

#physical parameters
eps=8.854e-12 #[F/m]
miu=4*np.pi*1e-7 #[H/m]
c=1/(eps*miu)**0.5
heta = (miu/eps)**0.5
q = 1.60217646e-19    # Elementary charge [Coulombs]
miu = 4 * np.pi * 1e-7    # Magnetic permeability [H/m]
g = 2    # Landau factor
me = 9.1093821545e-31    # Electron mass [kg]
gma_factor = 1
gama = gma_factor * g * q / (2 * me)
alpha = 0

dz = 2e-9/8
dt = 2

x = np.array([[1.,2.,3.], [4.,5.,6.], [7.,2.,5.]], dtype = np.float64)
#norm_x = np.array(np.linalg.norm(x, axis=1))
y = np.array([[4,5,6], [1,7,3], [4,5,6]], dtype = np.float64)
M_norm = np.arange(3, dtype=np.float64)
#an_res = np.arange(3, dtype=np.float64)
#d_x = cuda.to_device(x)
#d_y = cuda.to_device(y)
res = np.empty_like(x)
d_x = cuda.to_device(x)
d_y = cuda.to_device(y)
d_res = cuda.device_array_like(d_x)
d_M_norm = cuda.device_array_like(M_norm)

d_an_res = cuda.device_array_like(d_x)
d_bn_res = cuda.device_array_like(d_x)
d_cn_res = cuda.device_array_like(d_x)
d_dn_res = cuda.device_array_like(d_x)

blocks = 4  # Ensure enough blocks to cover the data
threadsperblock = 32 # Ensure enough threads to cover the data

# Call the kernel with launch configuration
#cross[blocks, threadsperblock](d_x, d_y, d_cross)
#print(d_cross.copy_to_host())
LLG_kernel[blocks, threadsperblock](d_x, d_y, dt, alpha, d_res, d_M_norm, d_an_res, d_bn_res, d_cn_res, d_dn_res)
cuda.synchronize()
print(d_res.copy_to_host())
print(d_M_norm.copy_to_host())
print(d_an_res.copy_to_host())



[[-3.12367001e+24 -3.67485308e+23  2.38868443e+24]
 [ 8.40055643e+24 -4.88078403e+24  8.58831060e+24]
 [ 1.63533074e+25 -1.13922175e+25 -1.40869036e+24]]
[3.74165739 3.74165739 3.74165739]
[[  663059.56357368   663059.56357368   663059.56357368]
 [-1326119.12714735 -1326119.12714735 -1326119.12714735]
 [  663059.56357368   663059.56357368   663059.56357368]]


In [None]:
import numpy as np
from numba import cuda, float64, int64
import math

@cuda.jit(device=True)
def cross_product(a, b, result):  # Pass result array as an argument
  """
  Calculates the cross product of two 3D vectors.
  """
  #result = cuda.local.array(3, dtype=np.float64)  # Remove local allocation
  result[0] = a[1] * b[2] - a[2] * b[1]
  result[1] = a[2] * b[0] - a[0] * b[2]
  result[2] = a[0] * b[1] - a[1] * b[0]
  #return result  # No need to return, result is modified in-place

@cuda.jit(device=True)
def norm(array):  # Pass result array as an argument
  norm = math.sqrt(array[0]**2 + array[1]**2 + array[2]**2)
  return norm

@cuda.jit
def LLG_kernel(array1, array2, dt, alpha, llg_result, M_0, an_res, bn_res, cn_res, dn_res):
  llg_gama = gama/((1+alpha**2))
  llg_lamda = gama*alpha/(1+alpha**2)
  M_norm = cuda.local.array(array1.shape[0], dtype=np.float64)

  idx = cuda.grid(1)
  if idx < array1.shape[0]:
    # Calculate M0 (norm) manually
    M_norm[id] = norm(array1[idx])
    cross1 = cuda.local.array(3, dtype=np.float64)  # Allocate cross product result arrays
    cross2 = cuda.local.array(3, dtype=np.float64)
    cross12 = cuda.local.array(3, dtype=np.float64)
    cross22 = cuda.local.array(3, dtype=np.float64)
    cross13 = cuda.local.array(3, dtype=np.float64)
    cross23 = cuda.local.array(3, dtype=np.float64)
    cross14 = cuda.local.array(3, dtype=np.float64)
    cross24 = cuda.local.array(3, dtype=np.float64)

    an = cuda.local.array(3, dtype=np.float64)
    bn = cuda.local.array(3, dtype=np.float64)
    cn = cuda.local.array(3, dtype=np.float64)
    dn = cuda.local.array(3, dtype=np.float64)

    sum_bn = cuda.local.array(3, dtype=np.float64)
    sum_cn = cuda.local.array(3, dtype=np.float64)
    sum_dn = cuda.local.array(3, dtype=np.float64)

    cross_product(array1[idx], array2[idx], cross1)  # Calculate cross products using modified function
    cross_product(array1[idx], cross1, cross2)

    # Update llg_result directly
    # Modify to use element-wise operations:
    for i in range(3):
        an[i] = -llg_gama * miu * cross1[i] - (llg_lamda * miu / M_norm) * cross2[i]

    for i in range(3):
        sum_bn[i] = array1[idx][i] + (dt/2) * an[i]

    cross_product(sum_bn, array2[idx], cross12)
    cross_product(sum_bn, cross12, cross22)

    # Modify to use element-wise operations:
    for i in range(3):
        bn[i] = -llg_gama * miu * cross12[i] - (llg_lamda * miu / M_norm) * cross22[i]

    for i in range(3):
        sum_cn[i] = array1[idx][i] + (dt/2) * bn[i]

    cross_product(sum_cn, array2[idx], cross13)
    cross_product(sum_cn, cross13, cross23)

    for i in range(3):
        cn[i] = -llg_gama * miu * cross13[i] - (llg_lamda * miu / M_norm) * cross23[i]

    for i in range(3):
        sum_dn[i] = array1[idx][i] + (dt/2) * cn[i]

    cross_product(sum_dn, array2[idx], cross14)
    cross_product(sum_dn, cross14, cross24)

    for i in range(3):
        dn[i] = -llg_gama * miu * cross14[i] - (llg_lamda * miu / M_norm) * cross24[i]

    for i in range(3):
      llg_result[idx][i] = array1[idx][i] + (dt/6)*(an[i] + 2*bn[i] + 2*cn[i] + dn[i])

    for i in range(3):
      an_res[i] = an[i]
      M_0[i] = M_norm


In [None]:
%timeit LLG_kernel[4,16](d_x, d_y, dt, alpha, d_res); cuda.synchronize()



94.4 µs ± 20.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
import numpy as np

def LLG_step(M: np.array, H: np.array, dt: float, alpha: float) -> np.array:
    """
    """
    M0 = np.linalg.norm(M, axis=1, keepdims=True)
    print("M0:", M0)
    gma_LL=gama/((1+alpha**2))
    LL_lambda=gama*alpha/(1+alpha**2)

   # Compute LLG terms
    an = -gma_LL * miu * np.cross(M, H, axis=1) - (LL_lambda * miu / M0) * np.cross(M, np.cross(M, H, axis=1), axis=1)
    print(an)
    bn = -gma_LL * miu * np.cross(M + (dt / 2) * an, H, axis=1) - (LL_lambda * miu / M0) * np.cross(M + (dt / 2) * an, np.cross(M + (dt / 2) * an, H, axis=1), axis=1)
    print(bn)
    cn = -gma_LL * miu * np.cross(M + (dt / 2) * bn, H, axis=1) - (LL_lambda * miu / M0) * np.cross(M + (dt / 2) * bn, np.cross(M + (dt / 2) * bn, H, axis=1), axis=1)
    print(cn)
    dn = -gma_LL * miu * np.cross(M + dt * cn, H, axis=1) - (LL_lambda * miu / M0) * np.cross(M + dt * cn, np.cross(M + dt * cn, H, axis=1), axis=1)
    print(dn)
    new_M = M + (dt/6)*(an+2*bn+2*cn+dn)
    return new_M

In [None]:
import numpy as np
from numba import cuda
import math

#physical parameters
eps=8.854e-12 #[F/m]
miu=4*np.pi*1e-7 #[H/m]
c=1/(eps*miu)**0.5
heta = (miu/eps)**0.5
q = 1.60217646e-19    # Elementary charge [Coulombs]
miu = 4 * np.pi * 1e-7    # Magnetic permeability [H/m]
g = 2    # Landau factor
me = 9.1093821545e-31    # Electron mass [kg]
gma_factor = 1
gama = gma_factor * g * q / (2 * me)
alpha = 0

dz = 2e-9/8
dt = 2

x = np.array([[1.,2.,3.], [4.,5.,6.], [7.,2.,5.]], dtype = np.float64)
y = np.array([[4,5,6], [1,7,3], [4,5,6]], dtype = np.float64)
#d_x = cuda.to_device(x)
#d_y = cuda.to_device(y)
res = np.empty_like(x)

print(LLG_step(x, y, dt, alpha))


M0: [[3.74165739]
 [8.77496439]
 [8.83176087]]
[[  663059.56357368 -1326119.12714735   663059.56357368]
 [ 5967536.07216308  1326119.12714735 -5083456.65406485]
 [ 2873258.10881926  4862436.79954029 -5967536.07216308]]
[[ 2.49133924e+12  2.93097330e+11 -1.90514060e+12]
 [-8.74410395e+12  5.08037804e+12 -8.93951411e+12]
 [-1.30428873e+13  9.08606322e+12  1.12353888e+12]]
[[-2.49405147e+18  4.98810822e+18 -2.49405587e+18]
 [-1.71992640e+19 -3.82205164e+18  1.46512085e+19]
 [-1.08075802e+19 -1.82897200e+19  2.24464868e+19]]
[[-1.87420101e+25 -2.20493180e+24  1.43321166e+25]
 [ 5.04034074e+25 -2.92846889e+25  5.15298050e+25]
 [ 9.81198875e+25 -6.83532317e+25 -8.45223192e+24]]
[[-6.24733836e+24 -7.34973942e+23  4.77737052e+24]
 [ 1.68011243e+25 -9.76156552e+24  1.71766114e+25]
 [ 3.27066219e+25 -2.27844227e+25 -2.81739568e+24]]


In [None]:
%timeit LLG_step(x, y, dt, alpha)

508 µs ± 94.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
