# 1. Loops

In [3]:
n = 10000

In [15]:
%%timeit
a = []
for i in range(n):
    a.append([])
    for j in range(n):
        a[i].append(0 + 5 * 52)

7.45 s ± 223 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
a[0][0]

260

# 2. List comprehensions

In [20]:
%%timeit
b = [[0 + 5 * 52 for j in range(n)] for i in range(n)]

2.67 s ± 110 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
b[0][0]

260

# 3. Numpy

In [21]:
import numpy as np

In [29]:
%%timeit
c = np.zeros((n,n)) + 5 * 52

322 ms ± 21.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
c[0][0]

260.0

# 4. Numba

In [30]:
import numba

In [33]:
@numba.njit()
def arr(n):
    return np.zeros((n,n)) + 5 * 52

In [34]:
%%timeit
arr(n)

500 ms ± 8.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
arr(n)[0,0]

260.0

In [36]:
@numba.njit(fastmath = True, parallel=True)
def arr(n):
    return np.zeros((n,n)) + 5 * 52

In [39]:
%%timeit
arr(n)

94.4 ms ± 742 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [40]:
arr(n)[0,0]

260.0

In [58]:
@numba.njit("float64[:,:](int16)", fastmath=True, parallel=True)
def arr(n):
    return np.zeros((n,n)) + 5 * 52

In [60]:
%%timeit
arr(n)

102 ms ± 7.92 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [54]:
arr(n)[0,0]

260.0

# 5. Pytorch

In [62]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [63]:
t = torch.zeros((n,n)).to(device)

In [64]:
%%timeit
t + 5 * 52

8.13 ms ± 241 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [65]:
torch.zeros((n,n)).to(device) + 5 * 52

tensor([[260., 260., 260.,  ..., 260., 260., 260.],
        [260., 260., 260.,  ..., 260., 260., 260.],
        [260., 260., 260.,  ..., 260., 260., 260.],
        ...,
        [260., 260., 260.,  ..., 260., 260., 260.],
        [260., 260., 260.,  ..., 260., 260., 260.],
        [260., 260., 260.,  ..., 260., 260., 260.]], device='cuda:0')