In [1]:
import numpy as np

In [18]:
matrix_a_shape = (100, 10)
matrix_b_shape = (10, 100)

In [19]:
matrix_a = np.random.random(matrix_a_shape)
matrix_b = np.random.random(matrix_b_shape)

print(f"matrix_a.shape: {matrix_a.shape}")
print(f"matrix_b.shape: {matrix_b.shape}")

matrix_a.shape: (100, 10)
matrix_b.shape: (10, 100)


In [20]:
def matmul_novec(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                c[i][j] += a[i][k] * b[k][j]
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_novec(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [21]:
%timeit matmul_novec(matrix_a, matrix_b)

61.7 ms ± 1.51 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
def matmul_vec_level1(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            c[i][j] = np.dot(a[i], b[:,j])
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level1(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [23]:
%timeit matmul_vec_level1(matrix_a, matrix_b)

12.3 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
def matmul_vec_level2(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        c[i] = np.sum(a[i] * b.T, axis=1)
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level2(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [25]:
%timeit matmul_vec_level2(matrix_a, matrix_b)

784 µs ± 29.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
def matmul_vec_level3(a, b):
    """Returns a * b"""
    c = np.sum(a[:,np.newaxis,:] * b.T, axis=2)
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level3(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [27]:
%timeit matmul_vec_level3(matrix_a, matrix_b)

137 µs ± 1.09 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [28]:
%timeit np.matmul(matrix_a, matrix_b)

8.82 µs ± 82.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
