In [1]:
import numpy as np
import zebende as zb
import pandas as pd
from numba import cuda
import cupy as cp

In [2]:
tws = pd.read_csv('./test_data/time_window_scales.txt', header=None).to_numpy().flatten()[:]
tws_D = cuda.to_device(tws)

In [3]:
data_1 = pd.read_csv('./test_data/data_pre_proc/S001/S001_03/S001_03_F3.txt', header=None)
data_2 = pd.read_csv('./test_data/data_pre_proc/S001/S001_03/S001_03_F6.txt', header=None)
data_3 = pd.read_csv('./test_data/data_pre_proc/S001/S001_03/S001_03_P3.txt', header=None)
data_4 = pd.read_csv('./test_data/data_pre_proc/S001/S001_03/S001_03_P6.txt', header=None)

In [4]:
data = pd.concat([data_1, data_2, data_3, data_4], axis=1).to_numpy(dtype=np.float64)
data

array([[-28., -57., -43., -52.],
       [-32., -67., -67., -60.],
       [-44., -74., -74., -66.],
       ...,
       [-15., -54., -59., -57.],
       [  0., -41., -25., -47.],
       [-15., -48., -50., -72.]])

In [5]:
int_data = zb.integrated_series(data).T
int_data_D = cp.array(int_data)

In [6]:
DCCA_of = zb.mat_index_comb(int_data, axis=0)
DCCA_of_D = cp.array(DCCA_of)

In [7]:
time_steps = cp.arange(data.shape[0])
# outputs
F_DFA_arr = cp.zeros(shape=(tws.shape[0], data.shape[1], tws.shape[0]), dtype=data.dtype)
DCCA_arr = cp.zeros(shape=(tws.shape[0], DCCA_of.shape[0], tws.shape[0]), dtype=data.dtype)
P_DCCA_arr = cp.ones(shape=(DCCA_of.max() + 1, DCCA_of.max() + 1, tws.shape[0]), dtype=data.dtype)

# auxiliary arrays
detrend = cp.zeros(shape=(data.shape[0] - tws[0], data.shape[1], tws.shape[0]), dtype=data.dtype)

f2dfa_n = cp.zeros(shape=(data.shape[0] - tws[0], data.shape[1], tws.shape[0]), dtype=data.dtype)

dcca_n = cp.zeros(shape=(data.shape[0] - tws[0], DCCA_of.shape[0], tws.shape[0]), dtype=data.dtype)




In [41]:
@cuda.jit()
def cuda_f2dfa(data, tws, time_steps, # input
                f2dfa_n  # Output
                ):
    
    x,y,z= cuda.grid(3)

    if (x < data.shape[0]) and (y <data.shape[0] - tws[z]) and (z< tws.shape[0]):
        x_sum = 0
        y_sum = 0
        xy_sum = 0
        x2_sum = 0
        n_pt_w = tws[z] + 1

        # fit line
        for i in range(n_pt_w):
            x_sum += time_steps[x + i]
            y_sum += data[x, y + i]
            xy_sum += time_steps[x + i] * data[x, y + i]
            x2_sum += time_steps[x + i]**2
        slope = ( ((n_pt_w * xy_sum) - (x_sum * y_sum)) / ((n_pt_w * x2_sum) - (x_sum**2)) )
        inter = ( (y_sum - (slope * x_sum)) / (n_pt_w) )

        # detrended mean
        tmp = 0
        for i in range(n_pt_w):
            tmp += ((data[x, y + i] - (slope * time_steps[x + i] + inter))**2)/n_pt_w

        #return value
        f2dfa_n[x,y,z] = tmp
    

In [30]:
tpb = 1
bpg = 1

In [31]:
GridDimX = data.shape[1]
GridDimY = data.shape[0]
GridDimZ = tws.shape[0]

GridDimX, GridDimY, GridDimZ


(4, 15742, 42)

In [32]:

tpb = (1, int(np.ceil(GridDimY/64)), 1)
bpg = (64,64, 64)

In [42]:
cuda_f2dfa[bpg, tpb](int_data_D, tws_D, time_steps, f2dfa_n)

TypingError: Failed in cuda mode pipeline (step: nopython frontend)
[1m[1mNo implementation of function Function(<built-in function getitem>) found for signature:
 
 >>> getitem(int64, int64)
 
There are 22 candidate implementations:
[1m      - Of which 22 did not match due to:
      Overload of function 'getitem': File: <numerous>: Line N/A.
        With argument(s): '(int64, int64)':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: typing of intrinsic-call at C:\Users\Nando\AppData\Local\Temp\ipykernel_19928\1464551959.py (15)[0m
[1m
File "C:\Users\Nando\AppData\Local\Temp\ipykernel_19928\1464551959.py", line 15:[0m
[1mdef cuda_f2dfa(data, tws, time_steps, # input
    <source elided>

[1m        tmp = data.shape[0][x] + data.shape[0][y]
[0m        [1m^[0m[0m


In [36]:
type(f2dfa_n), f2dfa_n.device

(cupy.ndarray, <CUDA Device 0>)

In [37]:
f2dfa_n_cpu = cp.asnumpy(f2dfa_n)

In [39]:
f2dfa_n_cpu

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]])

In [40]:
np.unique(f2dfa_n_cpu)

array([0.])