A compact reference for all valid call shapes supported by:
gpu_execute<T>(int N, F functor, int threads_per_block, const T*... inputs);
auto out = gpu_execute<float>(N, Functor{}, 256);
auto out = gpu_execute<int>(N, Functor{param}, 128);
auto out = gpu_execute<float>(N, Functor{}, 256, A);
auto out = gpu_execute<int>(N, Functor{N}, 512, arr);
auto out = gpu_execute<double>(N, Functor{}, 64, vec.data());
auto out = gpu_execute<int>(N, Functor{}, 256, A, B);
auto out = gpu_execute<float>(N, Functor{5}, 128, A, B);
auto out = gpu_execute<float>(N, Functor{}, 512, A.data(), B.data());
auto out = gpu_execute<float>(N, Functor{}, 256, A, B, C);
All inputs must be const T*.
auto out = gpu_execute<float>(N, Functor{}, 256, A, B, C, D, E);
auto out = gpu_execute<float>(N, Functor{}, 256, A);
auto out = gpu_execute<float>(N, Functor{param1, param2}, 256, A, B);
auto out = gpu_execute<float>(
N,
[] __device__ (float* const ptrs[], int i) { /* ... */ },
256,
A, B
);
gpu_execute<float>(N, F{}, 256, rawPtr);
gpu_execute<float>(N, F{}, 256, vec.data());
gpu_execute<float>(N, F{}, 256, std::vector<float>(N, 1.0f).data());
gpu_execute<float>(N, F{}, 256, A, B + 10, &C[5]);
gpu_execute<float>(N, F{}, 64, A);
gpu_execute<float>(N, F{}, 128, A, B);
gpu_execute<float>(N, F{}, 512, A, B, C);
gpu_execute<float>(N, F{}, 256, vec); // not a pointer
gpu_execute<float>(N, F{}, 256, (int*)A); // wrong type
gpu_execute<float>(N, F{}, 256, &someFloat); // not an array