In [1]:
import numpy as np

In [6]:
matrix_a_shape = (500, 784)
matrix_b_shape = (784, 100)

In [7]:
matrix_a = np.random.random(matrix_a_shape)
matrix_b = np.random.random(matrix_b_shape)

print(f"matrix_a.shape: {matrix_a.shape}")
print(f"matrix_b.shape: {matrix_b.shape}")

matrix_a.shape: (500, 784)
matrix_b.shape: (784, 100)


In [8]:
def matmul_novec(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                c[i][j] += a[i][k] * b[k][j]
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_novec(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [9]:
%timeit matmul_novec(matrix_a, matrix_b)

24.5 s ± 677 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
def matmul_vec_level1(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            c[i][j] = np.dot(a[i], b[:,j])
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level1(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [11]:
%timeit matmul_vec_level1(matrix_a, matrix_b)

102 ms ± 3.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
def matmul_vec_level2(a, b):
    """Returns a * b"""
    c = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        c[i] = np.sum(a[i] * b.T, axis=1)
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level2(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [13]:
%timeit matmul_vec_level2(matrix_a, matrix_b)

44.9 ms ± 425 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
def matmul_vec_level3(a, b):
    """Returns a * b"""
    c = np.sum(a[:,np.newaxis,:] * b.T, axis=2)
    return c

# test
# np_results = np.matmul(matrix_a, matrix_b)
# my_results = matmul_vec_level3(matrix_a, matrix_b)
# np.testing.assert_allclose(np_results, my_results)

In [17]:
%timeit matmul_vec_level3(matrix_a, matrix_b)

86.8 ms ± 883 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [16]:
%timeit np.matmul(matrix_a, matrix_b)

1.04 ms ± 40.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
