In [2]:
!uv pip install -q --system numba-cuda==0.4.0
import numpy as np
from numba import cuda
import time
import os
from numba import config
config.CUDA_ENABLE_PYNVJITLINK = 1


In [8]:
# CUDA Steps:
# Initializing data on CPU
# Transfer from CPU to GPU
# Run Kernel with defined Grid/Block size Threads
# Transfer result from GPU to CPU
# Clear Memory

# 1. CUDA kernel Device
@cuda.jit
def first_kernel(a, result):
    idx = cuda.grid(1)
    if idx < a.size:
        result[idx] = a[idx]

# Host
def main():
    # 2. Initialize data on CPU
    N = 10_000_000
    a_cpu = np.arange(N, dtype=np.float32)

    # -----------------------------
    # CPU computation
    # -----------------------------
    start = time.time()
    result_cpu = a_cpu
    cpu_time = time.time() - start
    print(f"CPU time: {cpu_time * 1e3:.2f} ms")

    # -----------------------------
    # GPU computation
    # -----------------------------
    start = time.time()
    a_gpu = cuda.to_device(a_cpu)
    result_gpu = cuda.device_array_like(a_cpu)
    transfer_in_time = time.time() - start

    threads_per_block = 128
    blocks_per_grid = (N + threads_per_block - 1) // threads_per_block

    start = time.time()
    first_kernel[blocks_per_grid, threads_per_block](a_gpu, result_gpu)
    cuda.synchronize()
    kernel_time = time.time() - start

    start = time.time()
    result_from_gpu = result_gpu.copy_to_host()
    cuda.synchronize()
    transfer_out_time = time.time() - start

    # REPORT
    print(f"Transfer to GPU time: {transfer_in_time * 1e3:.2f} ms")
    print(f"Kernel execution time: {kernel_time * 1e3:.2f} ms")
    print(f"GPU transfer to host: {transfer_out_time * 1e3:.2f} ms")
    print(f"Total GPU Time: {(transfer_in_time + kernel_time + transfer_out_time) * 1e3:.2f} ms")

    # âœ… Cleanup (correct position)
    del a_gpu, result_gpu
    cuda.close()

if __name__ == "__main__":
    main()


CPU time: 0.00 ms
Transfer to GPU time: 92.17 ms
Kernel execution time: 42.27 ms
GPU transfer to host: 14.94 ms
Total GPU Time: 149.38 ms
