# How fast can I do matrix multiplications?

Things to try:
- [X] numpy
- [X] pytorch tensor
- [X] numba
- [ ] gpu

In [189]:
import numpy as np

# T is the transformation matrix
# T = [[R, T]
#      [0, 1]]
# TODO: make tex

T = np.array([[ 7.533745e-03, -9.999714e-01,-6.166020e-04, -4.069766e-03],
             [ 1.480249e-02,  7.280733e-04, -9.998902e-01, -7.631618e-02],
             [ 9.998621e-01,  7.523790e-03,  1.480755e-02, -2.717806e-01],
             [ 0.000000e+00,  0.000000e+00,  0.000000e+00,  1.000000e+00]])

T.shape

(4, 4)

In [190]:
pnt_cloud = np.random.rand(428032, 4)
pnt_cloud.shape

(428032, 4)

## Using Numpy

In [191]:
def transform(pnt_cloud, T):
    
    i = 0
    for pnt in pnt_cloud:
        xyz_pnt = np.dot(T, pnt)
        
        if xyz_pnt[0] > 0:
            depth_array[i] = xyz_pnt[0]
            
        i += 1
            
        
    return depth_array

In [192]:
%time cloud = transform(pnt_cloud, T)

CPU times: user 670 ms, sys: 7.91 ms, total: 678 ms
Wall time: 674 ms


## Using Numba

In [204]:
from numba import njit

In [205]:
@njit
def transform(pnt_cloud, T):
    
    depth_array = np.zeros(pnt_cloud.shape[0])
    
    i = 0
    for pnt in pnt_cloud:
        xyz_pnt = np.dot(T, pnt)
        
        if xyz_pnt[0] > 0:
            depth_array[i] = xyz_pnt[0]
            
        i += 1
            
        
    return depth_array

In [207]:
%time cloud = transform(pnt_cloud, T)

CPU times: user 124 ms, sys: 0 ns, total: 124 ms
Wall time: 124 ms


## Using torch tensor


In [196]:
import torch

In [197]:
tensor_cld = torch.tensor(pnt_cloud)
tensor_T   = torch.tensor(T)

In [198]:
def transform(pnt_cloud, T, depth_array):
    i = 0
    for pnt in pnt_cloud:
        xyz_pnt = np.dot(T, pnt)
        
        if xyz_pnt[0] > 0:
            depth_array[i] = xyz_pnt[0]
            
        i += 1
            
        
    return depth_array

In [199]:
%time  depth_array = torch.tensor(np.zeros(pnt_cloud.shape[0]))
%time cloud_tensor = transform(tensor_cld, tensor_T, depth_array)

CPU times: user 5.9 ms, sys: 56 µs, total: 5.96 ms
Wall time: 2.62 ms
CPU times: user 6.15 s, sys: 28.1 ms, total: 6.18 s
Wall time: 6.09 s


## Making Numba Faster by predefining initialisation

In [208]:
@njit
def transform(pnt_cloud, T, depth_array):
    
    i = 0
    for pnt in pnt_cloud:
        xyz_pnt = np.dot(T, pnt)
        
        if xyz_pnt[0] > 0:
            depth_array[i] = xyz_pnt[0]
            
        i += 1
            
        
    return depth_array

In [210]:
%time depth_array = np.zeros(pnt_cloud.shape[0])
%time cloud_f = transform(pnt_cloud, T, depth_array)

CPU times: user 533 µs, sys: 5 µs, total: 538 µs
Wall time: 308 µs
CPU times: user 124 ms, sys: 9 µs, total: 124 ms
Wall time: 123 ms


In [211]:
np.all(cloud_f == cloud)

True

Seems like predefining doest play a major part, for obvious reasons.