# Vectorization

In [20]:
import numpy as np
import time
import math

In [16]:
a = np.array([1,2,3,4])
print (a)

[1 2 3 4]


## Vectorized version of a dot product

In [17]:
million = 1000000
a = np.random.rand(million)
b = np.random.rand(million)

tic = time.time()
c = np.dot(a,b)
toc = time.time()

print(c)
print('Vectorized version: ' + str(1000*(toc-tic)) + 'ms')

249909.29600916288
Vectorized version: 1.071929931640625ms


In [18]:
c = 0
million = 1000000

tic = time.time()
for i in range(million):
    c += a[i]*b[i]
toc = time.time()

print(c)
print('For loop: ' + str(1000*(toc-tic)) + 'ms')

249909.29600915493
For loop: 488.82079124450684ms


### Note
Whenever possible, avoid explicit for-loops

For example.  

Say you need to apply the exponential operation on every element of a matrix/vector

In [39]:
n = 100
v = np.array(np.arange(n))
u = np.zeros((n,1))

tic = time.time()
u = np.exp(v)
toc = time.time()

print('Vectorized version: ' + str(1000*(toc-tic)) + 'ms')

Vectorized version: 0.0591278076171875ms


In [40]:
n = 100
v = np.array(np.arange(n))
u = np.zeros((n,1))

tic = time.time()
for i in range(n):
    u[i] = math.exp(v[i])
toc = time.time()

print('For loop: ' + str(1000*(toc-tic)) + 'ms')

For loop: 0.12087821960449219ms


# Tips and Tricks

## Avoid rank 1 arrays
In the next example:
`a` is a five column structure, also called a rank 1 array in Python , it is neither a row vector, nor a column vector. 
When printing `a.shape` returns `(5,)`

In [51]:
a = np.random.randn(5)
print(a)
print(a.shape)
print(a.T)
print(np.dot(a, a.T))

[-1.23545474 -0.34080391  0.07085952  0.24060343  1.81728232]
(5,)
[-1.23545474 -0.34080391  0.07085952  0.24060343  1.81728232]
5.007921826715062


The transpose looks the same as the rank 1 array.
When printing the dot product between a and the transpose returns a number instead of an outer product.  

To solve this use a more explicit way to write the dimensions of the matrix.

In [52]:
a = np.random.randn(5, 1)
print(a)
print(a.shape)
print(a.T)
print(np.dot(a,a.T))

[[ 1.31509148]
 [ 0.96066851]
 [ 0.99344072]
 [ 1.44768354]
 [-0.90759636]]
(5, 1)
[[ 1.31509148  0.96066851  0.99344072  1.44768354 -0.90759636]]
[[ 1.7294656   1.26336698  1.30646543  1.90383629 -1.19357224]
 [ 1.26336698  0.92288399  0.95436722  1.39074399 -0.87189925]
 [ 1.30646543  0.95436722  0.98692446  1.43818777 -0.90164318]
 [ 1.90383629  1.39074399  1.43818777  2.09578762 -1.31391231]
 [-1.19357224 -0.87189925 -0.90164318 -1.31391231  0.82373115]]


In [59]:
a = np.random.randn(3, 3)
b = np.random.randn(3, 1)
c = a*b
c

array([[ 0.03791062, -0.24670332, -0.62617848],
       [ 0.03071569, -0.74492192, -2.57310989],
       [ 0.07548846, -0.19141862,  0.27891329]])