# Vectorisation in Python

In [2]:
import numpy as np

* We can obtain $z = w^T*x+b$ by using np.dot in python

In [6]:
x = np.array([1,2,3,50,100])
y = np.array([3,4,5,10,47])
b = 10
z = np.dot(x,y)+b
print(z)

5236


## Speed up in time for vectorisation
* vectorisation exploits parallelisation in both GPU's and CPU's and as a result are a lot faster than for loops. 

In [15]:
import time 

# initialise two random vectors of dimension 1 million

a = np.random.rand(10000000)
b = np.random.rand(10000000)

# We start with the vectorised version
tic = time.time()

c = np.dot(a,b)

toc = time.time()

diff_vectorised = toc - tic

print('Vectorised version in milliseconds :'+str(diff_vectorised*1000)+'ms')

# Next we try the for loop

tic = time.time()

sum = 0
for index in range(1000000):
    sum += a[index]*b[index]

toc = time.time()

diff_looped = toc - tic

print('looped version in milliseconds :'+str(diff_looped*1000)+'ms')

print('The speed up by using vectorisation is '+str(1/(diff_vectorised/diff_looped))+ ' times')


Vectorised version in milliseconds :9.408950805664062ms
looped version in milliseconds :426.32484436035156ms
The speed up by using vectorisation is 45.31056152442732 times


In [17]:
v = np.array([2.7,100,2])
b = np.exp(v) # elementwise exponential operation
print(b)

[1.48797317e+01 2.68811714e+43 7.38905610e+00]


## Vectorising logistic regression forward pass 

* Recall we have $m$ training examples $(x_{1},y_{1}),\ldots,(x_{m},y_{m})$ with $x_{i} \in \mathbb{R}^{n_{x}}$
* For each example we compute $z_{i} = w^{T}x_{i}+b$ and then $a_{i} = \sigma(z_{i})$





In [50]:
# data

n_x = 10    # feature dimensionality
m = 3 # training sample size 
x1 = np.random.rand(n_x)
x2 = np.random.rand(n_x)
x3 = np.random.rand(n_x)
y1 = 0
y2 = 1
y3 = 0

y = np.array([y1,y2,y3])
w = np.array([100,2,3,5,10,200,30,1,6,8])   # weights vector

In [37]:
X = np.stack((x1,x2,x3),axis = -1)
print(X)

[[0.39732429 0.0879649  0.8744926 ]
 [0.8491405  0.88313677 0.6331414 ]
 [0.55149249 0.72980439 0.96729557]
 [0.05238606 0.97006001 0.00266972]
 [0.34761441 0.69797405 0.64967418]
 [0.58261928 0.22312668 0.95458838]
 [0.52374288 0.38721469 0.82597694]
 [0.93689477 0.07932701 0.41658208]
 [0.02695664 0.54463197 0.0602862 ]
 [0.16167373 0.43218378 0.66454221]]


In [39]:
z = np.dot(w.T,X)+10
z

array([191.45142765,  97.62858254, 329.91914098])

In [47]:
import math
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [49]:
# Equivalent to A = [z1,z2,z3]
# Since Z values are large a values will be near 1
A = sigmoid(z)
A

array([1., 1., 1.])

## Vectorising linear regression backward pass for a single iteration


Loss Function
* Recall we have $m$ training examples $(x_{1},y_{1}),\ldots,(x_{m},y_{m})$ with $x_{i} \in \mathbb{R}^{n_{x}}$
* For each example we compute $z_{i} = w^{T}x_{i}+b$ and then $a_{i} = \sigma(z_{i})$

* The loss function w.r.t one example say example $i$ is $L(a_{i},y_{i})=-y_{i}\mathrm{log}(a_{i})+(1-y_{i})\mathrm{log}(1-a_{i})$ where $a_{i} = \sigma(z_{i})$
* 'dai' is then $\frac{\partial L}{\partial a_{i}} = \frac{-y}{a_{i}}+\frac{1-y}{1-a_{i}}$
* 'dzi' is then $\frac{\partial L}{\partial z_{i}} = \frac{\partial L}{\partial a_{i}} \cdot \frac{\partial a_{i}}{\partial z_{i}} = a_{i}-y$
* 'dwi' is then $\frac{\partial L}{\partial w_{i}} = x_{i}dz_{i}$
* 'dbi' is then $\frac{\partial L}{\partial b_{i}} = dz_{i}$



Cost Function
* Now that we have a value for the loss w.r.t a single training example $1 \leq i \leq m$. We average to get the cost function $$J(w,b) = \frac{1}{m} \sum_{i=1}^{m} L(a^{i},y)$$
* We can distribute the partial derivatives across the sum for any variable we are interested in as follows
$$ \frac{\partial J}{\partial w_{i}} = \frac{1}{m} \sum_{i=1}^{m} \frac{\partial}{\partial w_{i}} L(a_{i},y^{i}) $$
* In our example dz = [dz1,dz2,dz3] = [a1-y1,a2-y2,a3-y3] = A-y
* $db = \frac{1}{m} \sum_{i=1}^{m} dz_{i}$ and we can calculate this as db = 1/m * np.sum(dz) 
* $dw = 1/m \cdot X \cdot dz^{T}$ since $X = [x^1,x^2,\ldots,x^m]$ 

Updates 

* $w = w-\alpha \cdot dw$
* $b = b-\alpha \cdot db$


# We can now run this code over a number of iterations enclosed in a for loop

    num_iters = 1000
    for i in range(num_iters):
        z = np.dot(w.T,X)+b   #z = w^T X+b
        A = \sigma(Z)
        dz = A-y
        dw = 1/m * X * dz^T
        db = 1/m * np.sum(dz)
        
        
        w = w-\alpha*dw
        b = b-\alpha*db
        
        
    

## Broadcasting Example

In [54]:
A = np.array([[56.0, 0.0, 10.0,11.2],[34,56,70.0,23],[24,47,11,15]])

In [55]:
print(A)

[[56.   0.  10.  11.2]
 [34.  56.  70.  23. ]
 [24.  47.  11.  15. ]]


In [59]:
col_sum = A.sum(axis = 0) # column sum
print(col_sum)
print(col_sum.shape)

[114.  103.   91.   49.2]
(4,)


In [60]:
print(col_sum.reshape(1,4))
print(col_sum.reshape(1,4).shape)

[[114.  103.   91.   49.2]]
(1, 4)


In [58]:
percentage = 100*A/col_sum.reshape(1,4) # reshapes the array
percentage
# divides a (3,4) matrix by a (1,4) matrix
# 3 lots of (1,4) rows and we divide each row pairwise by another (1,4) matrix

array([[49.12280702,  0.        , 10.98901099, 22.76422764],
       [29.8245614 , 54.36893204, 76.92307692, 46.74796748],
       [21.05263158, 45.63106796, 12.08791209, 30.48780488]])

In [65]:
B = np.array([[1],[2],[3],[4]])
print(B.shape)
print(B+100)
C = np.array([100])


(4, 1)
[[101]
 [102]
 [103]
 [104]]


In [66]:
B+C

array([[101],
       [102],
       [103],
       [104]])

In [76]:
D = np.array([[3,4,5],[1,2,3]])
E = np.array([[100,50,60]])
D+E

array([[103,  54,  65],
       [101,  52,  63]])

In [77]:
a = np.random.rand(5) 
print(a)
# this is a rank 1 array

[0.16537173 0.45427248 0.32270944 0.87369049 0.05063514]


In [78]:
b = np.random.rand(5,1) 
print(b)
# this is a (5,1) column vector

[[0.73489608]
 [0.49890107]
 [0.25980254]
 [0.92120178]
 [0.70169483]]


In [79]:
c = np.random.rand(1,5)
print(c)
# this is a (1,5) row vector

[[0.27267061 0.27367064 0.08902385 0.24000531 0.93662333]]
