## Vectorization

In [2]:
from numba import jit
import numpy as np
a = np.array([1, 2, 3, 4])
a

array([1, 2, 3, 4])

In [3]:
mynum = 1000000000

In [4]:
a = np.random.rand(mynum)
b = np.random.rand(mynum)

In [5]:
def dot_product(a,b):
    return np.round(np.dot(a,b),3)

In [8]:
%%time
print("Vectorized version:")
c = dot_product(a,b)
print(c)

Vectorized version:
250000049.689
Wall time: 1.04 s


In [7]:
def for_loop(a,b):
    total = 0 
    for i in range(len(a)):
        total += a[i]*b[i]
    
    return np.round(total, 3)        

In [9]:
%%time
print("Not vectorized version: ")
d = for_loop(a,b)
print(d)
print(d == c)

Not vectorized version: 
250000049.689
True
Wall time: 3min 55s


In [10]:
@jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit
def numbaTest(a, b):
    return np.round(np.dot(a, b), 3)

In [36]:
%%time
print("Numba version: ")
f = numbaTest(a, b)
print(f)
print(f == c)
print(f == d)

Numba version: 
250000049.689
True
True
Wall time: 561 ms


In [37]:
%%time
print("Vectorized version:")
c = dot_product(a,b)
print(c)
print(d==c)
print(f==c)

Vectorized version:
250000049.689
True
True
Wall time: 807 ms


## Broadcasting

In [9]:
A = np.array([[56.0, 0.0, 4.4, 68.0],
             [1.2, 104.0, 52.0, 8.0],
             [1.8, 135.0, 99.0, 0.9]])
print(A)

[[ 56.    0.    4.4  68. ]
 [  1.2 104.   52.    8. ]
 [  1.8 135.   99.    0.9]]


In [12]:
cal = A.sum(axis = 0) # sum vertically
print(cal)

[ 59.  239.  155.4  76.9]


In [6]:
percentage = 100*A/cal # broadcasting
print(percentage)

[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]


###### (3,4) / (1,4) !?!?!?!? IMPOSSIBLE

## Debugging Numpy -- Avoid rank 1 arrays

In [33]:
a = np.random.rand(5)
print(a)

[0.40909182 0.60063122 0.79368265 0.41080177 0.78870747]


In [34]:
print(a.shape) # rank 1 array, neither row nor column vector

(5,)


In [35]:
a.T

array([0.40909182, 0.60063122, 0.79368265, 0.41080177, 0.78870747])

In [36]:
print(np.dot(a, a.T))

1.9488636919562858


In [37]:
assert(a.shape == (5,1))

AssertionError: 

In [38]:
a = a.reshape((5,1))

In [39]:
assert(a.shape == (5,1))

In [40]:
a = np.random.randn(5, 1)
print(a)

[[ 0.45072018]
 [ 1.38967127]
 [-0.79255323]
 [-0.27818423]
 [ 0.30598728]]


In [41]:
print(a.T)

[[ 0.45072018  1.38967127 -0.79255323 -0.27818423  0.30598728]]


In [42]:
print(np.dot(a, a.T))

[[ 0.20314868  0.62635288 -0.35721973 -0.12538325  0.13791464]
 [ 0.62635288  1.93118624 -1.10138845 -0.38658464  0.42522173]
 [-0.35721973 -1.10138845  0.62814062  0.22047581 -0.2425112 ]
 [-0.12538325 -0.38658464  0.22047581  0.07738647 -0.08512084]
 [ 0.13791464  0.42522173 -0.2425112  -0.08512084  0.09362821]]


In [43]:
assert(a.shape == (5,1))