In [18]:
import numpy as np
import time
import cupy as cp

In [19]:
length_vectors = [1000, 5000, 10000, 50000, 100000, 500000, 1000000]

In [20]:
add_kernel = cp.RawKernel(r'''
extern "C" __global__
void vec_sum(const int* a, int* b, const int size) {
    int gridSize = blockDim.x * gridDim.x;
    int first_index = blockIdx.x * blockDim.x + threadIdx.x;
    for (int index = first_index; index < size; index += gridSize)
    {
        atomicAdd(&b[0], a[index]);
    }
}
''', 'vec_sum')

In [21]:
def vector_sum(a):
    sum = 0
    for i in range(len(a)):
        sum += a[i]
    return sum

In [None]:
for length in length_vectors:
    print(length, 'size')
    vector = cp.ones(length, dtype=int)
    result = cp.zeros(1, dtype=int)
    a_cpu = np.random.random(length)

    t = time.perf_counter()
    cpu_np_vector_sum = np.sum(a_cpu)
    cpu_np_time = time.perf_counter() - t
    print(cpu_np_time, 'Time on CPU with numpy')

    t = time.perf_counter()
    add_kernel((1024,), (1024,), (vector, result, length))
    gpu_time = time.perf_counter() - t
    print(gpu_time, 'Time on GPU')

    t = time.perf_counter()
    vector_sum(a_cpu)
    cpu_time = time.perf_counter() - t
    print(cpu_time, 'Time on CPU')