# Using vectorization in Python

## 1. Vectorized vs. For Loop

In [44]:
import numpy as np
import time

HOW_LONG_TO_RUN = 100

a = np.random.rand(1000000)
b = np.random.rand(1000000)

In [45]:
def vectorized(a, b, nb_times):
    m = []
    for i in xrange(nb_times):
        tic = time.time()
        c = np.dot(a, b)
        toc = time.time()
        #print("Vectorized version: " + str(1000*(toc - tic)) + " ms")
        m.append(1000*(toc - tic))

    mean_m = (np.array(m).sum())/float(nb_times)
    return mean_m, c

In [46]:
mean_m, c = vectorized(a, b, HOW_LONG_TO_RUN)
print("Mean of "+ str(HOW_LONG_TO_RUN) +" runnings as vectorized version: "+ str(mean_m) +" ms")
print("Value: "+ str(c))

Mean of 100 runnings as vectorized version: 0.910415649414 ms
Value: 250490.600103


In [47]:
def for_loop_zip(a, b, nb_times):
    m = []
    c = 0
    for i in xrange(nb_times):
        tic = time.time()
        for a_j, b_j in zip(a, b):
            c += a_j * b_j
        toc = time.time()
        #print("Zip version: " + str(1000*(toc - tic)) + " ms")
        m.append(1000*(toc - tic))

    mean_m = (np.array(m).sum())/float(nb_times)
    return mean_m, c

In [48]:
mean_m, c = for_loop_zip(a, b, HOW_LONG_TO_RUN)
print("Mean of "+ str(HOW_LONG_TO_RUN) +" runnings as for loop version: "+ str(mean_m) +" ms")
print("Value: "+ str(c))

Mean of 100 runnings as for loop version: 406.943519115 ms
Value: 25049060.0103


In [49]:
def for_loop(a, b, nb_times):
    m = []
    c = 0
    for i in xrange(nb_times):
        tic = time.time()
        for j in range(len(a)):
                c += a[j] * b[j]
        toc = time.time()
        #print("Loop version: " + str(1000*(toc - tic)) + " ms")
        m.append(1000*(toc - tic))

    mean_m = (np.array(m).sum())/float(nb_times)
    return mean_m, c

In [50]:
mean_m, c = for_loop(a, b, HOW_LONG_TO_RUN)
print("Mean of "+ str(HOW_LONG_TO_RUN) +" runnings as for loop version: "+ str(mean_m) +" ms")
print("Value: "+ str(c))

Mean of 100 runnings as for loop version: 349.924705029 ms
Value: 25049060.0103


## 2. Logistic Regression with vectors


### Calculating for $m$ examples and $n = 2$ features <font color='red'>without</font> vectorization

$
J = 0; \\
dw_1 = 0; \\
dw_2 = 0; \\
db = 0 \\
\\
\text{For}\ \ i=1\ \ \text{to}\ \ m \\
\hspace{15pt}z^{(i)} = w^Tx^{(i)} + b \\ 
\hspace{15pt}a^{(i)} = \sigma(z^{(i)}) \\
\hspace{15pt}J\ += - [y^{(i)}\ log(a^{(i)}) + (1 - y^{(i)})\ log(1 - a^{(i)})] \\
\hspace{15pt}dz^{(i)} = a^{(i)} - y^{(i)} \\
\hspace{15pt}dw_1 += x_1^{(i)}dz^{(i)} \\
\hspace{15pt}dw_2 += x_2^{(i)}dz^{(i)} \\
\hspace{15pt}db += dz^{(i)} \\
\hspace{15pt}J\ /=\ m \\
\hspace{15pt}dw_1\ /=\ m \\
\hspace{15pt}dw_2\ /=\ m \\
\hspace{15pt}db\ /=\ m \\
\text{End for}
$

### Updating weights

$
w_1\ := w_1 - \alpha dw_1 \\
w_2\ := w_2 - \alpha dw_2 \\
b\ := b - \alpha db \\
$

---
### Calculating for $m$ examples and $n = 2$ features <font color='red'>with</font> vectorization

$
J = 0; \\
dw = np.zeros((n_x, 1)); \\
db = 0; \\
\\
\text{For}\ \ i=1\ \ \text{to}\ \ m \\
\hspace{15pt}z^{(i)} = w^Tx^{(i)} + b \\ 
\hspace{15pt}a^{(i)} = \sigma(z^{(i)}) \\
\hspace{15pt}J\ += - [y^{(i)}\ log(a^{(i)}) + (1 - y^{(i)})\ log(1 - a^{(i)})] \\
\hspace{15pt}dz^{(i)} = a^{(i)} - y^{(i)} \\
\hspace{15pt}dw += x^{(i)}dz^{(i)} \\
\hspace{15pt}db += dz^{(i)} \\
\hspace{15pt}J\ /=\ m \\
\hspace{15pt}dw\ /=\ m \\
\hspace{15pt}db\ /=\ m \\
\text{End for}
$

### Updating weights

$
w\ := w - \alpha dw \\
b\ := b - \alpha db \\
$

## 3. Implementing Vectorization in Forward Propagation 

$
X = \begin{bmatrix}
| & | &  & |\\
x^{(1)} & x^{(2)} & \dots & x^{(m)} \\
| & | &  & |\\
\end{bmatrix} \hspace{15pt}\rightarrow\hspace{15pt} (n_x, m)\hspace{15pt} \mathbb{R}^{n_x,m}
$

$
Z = [ z^{(1)}\ \ z^{(2)}\ \ \dots z^{(m)}] = w^TX + [b, b, \dots b] \hspace{15pt}\rightarrow\hspace{15pt} b \in \mathbb{R} (1,1) matrix \\
Z = np.dot(w^T, X) + b \\
A = [ a^{(1)}\ \ a^{(2)}\ \ \dots a^{(m)}] = \sigma(Z)
$

## 4. Implementing Vectorization in Backward Propagation 

$
dz^{(1)} = a^{(1)} - y^{(1)} \hspace{15pt} dz^{(2)} = a^{(2)} - y^{(2)} \dots \\
dZ = [dz^{(1)}\ \ dz^{(2)}\ \ \dots dz^{(m)}] \\
$

Given that:

$
A = [a^{(1)}\ \ a^{(2)}\ \ \dots a^{(m)}]  \hspace{30pt} Y = [y^{(1)}\ \ y^{(2)}\ \ \dots y^{(m)}]\\
dZ = A - Y \hspace{15pt}\rightarrow\hspace{15pt} [a^{(1)}y^{(1)}\ \ a^{(2)}y^{(2)}\ \ \dots ]\\ 
db = \frac{1}{m} np.sum(dZ) \\
dw = \frac{1}{m} XdZ^T
$

$
dw = \frac{1}{m} \begin{bmatrix}
| & | &  & |\\
x^{(1)} & x^{(2)} & \dots & x^{(m)} \\
| & | &  & |\\
\end{bmatrix}\begin{bmatrix}
dz^{(1)}\\
dz^{(2)}\\
\vdots \\
dz^{(m)}\\
\end{bmatrix}
$
