In [7]:
from numba import cuda 
import numpy as np 
# to measure execution time 
from timeit import default_timer as timer 

# Normal function to run on CPU 
def func(a):                                 
    for i in range(a.size):  # Loop through all elements of the array
        a[i] += 1  # Increment each element by 1

# Function optimized to run on GPU 
@cuda.jit  # This decorator compiles the function for GPU execution
def func2(a): 
    idx = cuda.grid(1)  # Get the unique thread index for the current block
    if idx < a.size:  # Check if the index is within the bounds of the array
        a[idx] += 1  # Increment the element at that index

if __name__ == "__main__": 
    n = 10000000  # Define the size of the array
    a = np.ones(n, dtype=np.float64)  # Create an array of ones on the host

    start = timer()  # Start the timer for CPU execution
    func(a)  # Execute the CPU function
    cpu_time = timer() - start  # Calculate the elapsed time for CPU
    print("Without GPU:", cpu_time)  # Print the time taken for CPU execution
    
    # Allocate array on GPU memory and copy data from host to device
    a_global_mem = cuda.to_device(a) 
    
    # Configure the number of threads per block and the number of blocks
    threads_per_block = 256  # Define how many threads will be in each block
    blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block  # Number of blocks needed

    start = timer()  # Start the timer for GPU execution
    func2[blocks_per_grid, threads_per_block](a_global_mem)  # Launch the GPU kernel
    cuda.synchronize()  # Wait for the GPU to finish executing
    gpu_time = timer() - start  # Calculate the elapsed time for GPU
    print("With GPU:", gpu_time)  # Print the time taken for GPU execution
    
    # Copy the result back to the host memory from device memory
    result = a_global_mem.copy_to_host()
    
    # Calculate and print the speedup
    speedup = cpu_time / gpu_time  # Calculate the speedup rate
    print(f"Speedup Rate (CPU time / GPU time): {speedup} Times Faster!")  # Print the speedup ratio

Without GPU: 2.6827477000188082
With GPU: 0.06197740009520203
Speedup Rate (CPU time / GPU time): 43.285902537019986 Times Faster!
