<a href="https://colab.research.google.com/github/1420kartik/cudaVectorAddition/blob/main/vectorAddition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
from numba import cuda
import numpy as np

# Addition Function

In [2]:
@cuda.jit
def vector_addition(a, b, c):
    idx = cuda.grid(1)
    if idx < a.size:
        c[idx] = a[idx] + b[idx]

# Main

In [3]:
n = 1024  # Size of vectors

In [4]:
# Initializing the vectors locally
a = np.ones(n, dtype=np.float32)
b = np.ones(n, dtype=np.float32)
c = np.zeros(n, dtype=np.float32)

In [5]:
# Moving the vectors to GPU
a_device = cuda.to_device(a)
b_device = cuda.to_device(b)
c_device = cuda.to_device(c)

In [6]:
# 4 blocks with 256 threads each will efficiently calculate the sum of our vectors
threads_per_block = 256
blocks_per_grid = 4

In [7]:
# Launching the CUDA kernel
vector_addition[blocks_per_grid, threads_per_block](a_device, b_device, c_device)



In [8]:
# Moving the result to local system
c = c_device.copy_to_host()

In [9]:
print("Result of vector addition:", c)

Result of vector addition: [2. 2. 2. ... 2. 2. 2.]
