In [5]:
import ctypes
import time
import functools

def tictoc(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        time_taken = end_time - start_time
        print(f"Function '{func.__name__}' executed in {time_taken:.4f} seconds")
        return result
    return wrapper

In [6]:
# Load the shared library
addition = ctypes.CDLL('./add.so')
addition.kernel_launcher.argtypes = [ctypes.c_int]
addition.kernel_launcher.restype = ctypes.c_int
@tictoc
def run_kernel_launcher(i):
    return addition.kernel_launcher(i)
@tictoc
def python_code(i):
    result = 0
    for i in range(i):
        result += i+1
threads = 1024
result = run_kernel_launcher(threads)
print(f"Python => The CUDA result is: {result}")
result = python_code(threads)
print(f"Python => The python result is: {result}")

Kernel => Sum = 521200
Function 'run_kernel_launcher' executed in 0.0004 seconds
Python => The CUDA result is: 524800
Function 'python_code' executed in 0.0000 seconds
Python => The python result is: None
C => Sum = 524800
