## Reduction operation: the sum of the numbers in the range [0, value)

In [4]:
import argparse

parser = argparse.ArgumentParser(description="Notebook GPU")
parser.add_argument('--value', type=int, default=5000000, help="Número de elementos en el array.")
args, unknown = parser.parse_known_args()

value = args.value
print(f"Tamaño del array: {value}")


Tamaño del array: 5000000


In [1]:
import numpy as np

def reduc_operation(A):
    """Compute the sum of the elements of Array A in the range [0, value)."""
    s = 0
    for i in range(A.size):
        s += A[i]
    return s

# Secuencial

#value = 5*10**4

X = np.random.rand(value)

# Para imprimir los pimeros valores del array

# print(X[0:12])

# Utilizando las operaciones mágicas de ipython

tiempo = %timeit -r 2 -o -q reduc_operation(X)

print("Time taken by reduction operation using a function:", tiempo)


print(f"And the result of the sum of numbers in the range [0, value) is: {reduc_operation(X)}\n")


# Utilizando numpy.sum()

tiempo = %timeit -r 2 -o -q np.sum(X)

print("Time taken by reduction operation using numpy.sum():", tiempo)

print("Now, the result using numpy.sum():", np.sum(X),"\n ")


# Utilizando numpy.ndarray.sum()

tiempo= %timeit -r 2 -o -q X.sum()

print("Time taken by reduction operation using numpy.ndarray.sum():", tiempo)

print("Now, the result using numpy.ndarray.sum():", X.sum())




Time taken by reduction operation using a function: 2.62 ms ± 8.27 µs per loop (mean ± std. dev. of 2 runs, 100 loops each)
And the result of the sum of numbers in the range [0, value) is: 24995.051343753086

Time taken by reduction operation using numpy.sum(): 7.88 µs ± 8.1 ns per loop (mean ± std. dev. of 2 runs, 100,000 loops each)
Now, the result using numpy.sum(): 24995.051343752984 
 
Time taken by reduction operation using numpy.ndarray.sum(): 7.14 µs ± 0.478 ns per loop (mean ± std. dev. of 2 runs, 100,000 loops each)
Now, the result using numpy.ndarray.sum(): 24995.051343752984


In [12]:
#Ejercicio A - Usar la librería CuPy 
import cupy as cp

# Inicializar el array en la GPU
X_gpu = cp.random.rand(value)  

# Calcular la suma usando CuPy
tiempo = %timeit -r 2 -o -q cp.sum(X_gpu)
print("Time taken by reduction operation using CuPy sum():", tiempo)
print("Now, the result using CuPy sum():", cp.sum(X_gpu))

Time taken by reduction operation using CuPy sum(): 155 µs ± 10.6 ns per loop (mean ± std. dev. of 2 runs, 10,000 loops each)
Now, the result using CuPy sum(): 2499444.2881509443


In [13]:
import time
from numba import cuda

@cuda.jit
def gpu_reduction_kernel(A, result):
    """
    Suma los elementos del array A en paralelo usando CUDA.
    """
    idx = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
    if idx < A.size:
        cuda.atomic.add(result, 0, A[idx])

def calculate_with_numba(value):
    """
    Calcula la suma de un array usando un kernel de reducción en GPU con Numba.
    Mide y devuelve el tiempo de ejecución.
    """
    # Crear el array en la GPU usando CuPy y `value`
    X_gpu = cp.random.rand(value)
    result = cp.array([0], dtype=cp.float32)

    # Configurar el número de threads y bloques
    threads_per_block = 256
    blocks_per_grid = (X_gpu.size + (threads_per_block - 1)) // threads_per_block

    # Medir el tiempo de ejecución
    start_time = time.perf_counter()
    gpu_reduction_kernel[blocks_per_grid, threads_per_block](X_gpu, result)
    end_time = time.perf_counter()

    print(f"Sum result using Numba GPU kernel: {result[0]}")
    print(f"Execution time: {end_time - start_time:.6f} seconds")

# Llamar a la función con el valor de `value`
calculate_with_numba(value)



Sum result using Numba GPU kernel: 2500549.0
Execution time: 0.037655 seconds
