# Add

In [31]:
import numba 
from numba import cuda, jit, njit
import numpy as np
from functools import reduce
import datetime as dt

def now():
    return dt.datetime.now()

with open('offline_1000000.csv', 'r', encoding='utf-8') as arq:
    array0 = np.array([int(i) for i in arq.read().split('\n') if i])

add = 100
ref = sum([i + add for i in array0])
array.size, array.dtype, ref

(1000000, dtype('int64'), 7434265667814)

In [17]:
array = array0.copy()
@njit(parallel=True)
def soma(array, add):
    return array + add

%timeit soma(array, add)
f'{round(np.average([0.00573, 0.00532, 0.00415]), 8):,.8}, {sum(soma(array, add))==ref}'

4.15 ms ± 154 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


'0.005525, True'

In [18]:
array = array0.copy()
@njit
def soma(array, add):
    return array + add
%timeit soma(array, add)
f'{round(np.average([0.0055, 0.00673, 0.00672, 0.00675]), 8):,.8}, {sum(soma(array, add))==ref}'

6.75 ms ± 492 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


'0.00631667, True'

In [29]:
array = array0.copy()
@cuda.jit
def soma(array, add):
    # Thread id in a 1D block
    tx = cuda.threadIdx.x
    # Block id in a 1D grid
    ty = cuda.blockIdx.x
    # Block width, i.e. number of threads per block
    bw = cuda.blockDim.x
    # Compute flattened index inside the array
    pos = tx + ty * bw # pos = cuda.grid(1)
    if pos < array.size:  # Check array boundaries
        array[pos] += add
threadsperblock = 32
blockspergrid = (array.size + (threadsperblock - 1)) // threadsperblock
%timeit soma[blockspergrid, threadsperblock](array, add)
f'{round(np.average([0.0108, 0.0127]), 8):,.8}, {sum(array)==ref}', sum(array), ref

10.8 ms ± 772 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


('0.01175, False', 7434965667814, 7434265667814)