### **Vectorization**: 
Method of utilizing functions that do faster numerical computation using parallel processing capabilities of CPU or GPU than for loops. 
In python numpy methods are vectorized, always perform better than for loops

In [1]:
import numpy as np 
import time

In [6]:
a = np.random.rand(1000000)
b = np.random.rand(1000000)

In [32]:
start = time.time()
z = np.dot(a,b)
end = time.time()
print("Vectorized time = {0}{1}".format(1000*(end -start),' milli secs'))
print(z)

Vectorized time = 0.7810592651367188 milli secs
250355.004922


In [31]:
start = time.time()
z = 0
for i in range(1000000):
    z+= a[i]*b[i]
end = time.time()
print("For Loop time = {0}{1}".format(1000*(end -start),' milli secs'))
print(z)
    

For Loop time = 346.8492031097412 milli secs
250355.004922


### Example of vectorization

In [247]:
# Vectorize 
a = np.arange(12).reshape(3,4)
b = np.arange(4).reshape(4,1)
c = np.zeros_like(a)

# a.shape = (3,4)
# b.shape = (4,1)

for i in range(3):
  for j in range(4):
    c[i][j] = a[i][j] + b[j]
print(a)
print(b)
print(c)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[0]
 [1]
 [2]
 [3]]
[[ 0  2  4  6]
 [ 4  6  8 10]
 [ 8 10 12 14]]


In [248]:
a + b.T

array([[ 0,  2,  4,  6],
       [ 4,  6,  8, 10],
       [ 8, 10, 12, 14]])

### Importance of Vectorization

In Deep Learning, we will mostly deal with massive calculations, vectorization can make a big difference in completing
a single iteration. 
On smaller data sets, it used to be 'good to have' option, in deep learning, it becomes must have

Numpy methods perform vectorization and are way faster than loops. 
Example: element wise operations performed by functions-
exp(x), abs(x), exp(x) , log(x), 1/x where x is a matrix

## Vectorization of Logistic Regression using representation in 1_LR_algo_math_rep.ipynb

### Function to compute $z^{(i)}, a^{(i)}$

In [225]:
import numpy as np
def Z(X, W, b):
    '''
    Argumnts : 
    X : n * m matrix , every feature is a row
    W : n*1 column vector
    b : a scalar
    
    Returns:
    z : 1*m matrix'''
    #print("dimension of X is {0}, W is {1}, B is {2}".format(str(X.shape), str(W.shape),str(B.shape)))
    z = np.dot(W.T,X)+ b  # use of broadcasting
    return(z)

def A(z):
    '''
    Arguments:
    z : 1*m matrix
    Returns:
    a : 1*m matrix
    '''
    a = 1/(1 + np.exp(-1*z))
    #print("dimension of a matrix are {0}".format(str(a.shape)))
    return(a)

### Function to compute Cost Function J

In [226]:
def Log_Loss(y_vec, a_vec):
    '''
    Arguments:
    y_vec: 1*m vector
    a_vec: 1*m vector
    Returns:
    loss : a scalar quantity'''
    #print("dimension of a matrices y and a are {0} and {1}".format(str(y_vec.shape),str(a_vec.shape)))
    m = y_vec.shape[1]
    loss = -1 *(y_vec * np.log(a_vec) + (1-y_vec)*(np.log(1-a_vec)))
    cost = (1/m) *np.sum(loss, axis = 1)
    return cost    

### Function to update weights/ implement one step of gradient descent

In [227]:
def grad_desc(X, w, a_vec, y_vec, b, alpha):
    m = y_vec.shape[0]
    '''
    The line for vector dw_vec below computes dw_j for all j in a single matrix multiplication
    
    Arguments:
    X: n*m matrix
    w: n*1 vector
    a_vec: 1*m vector
    y_vec: 1*m vector
    b : scalar
    alpha: learning rate, a scalar
    
    Returns:
    w_new : n*1 vector of updated weights
    b_new : scalar
    '''
    dz_vec = a_vec-y_vec
    dw_vec = np.dot(X, dz_vec.T) # n*1 vec
    db = np.sum(dz_vec, axis = 1)
    w_new =  w - (alpha/m)* dw_vec
    b_new = b - (alpha/m)*db
    
    return w_new,b_new
    

## Gradient Descent Implementation to train a logistic regression model

In [228]:

##### Logistic Regression Algorithm #############
#1. Randomly initialize weights, bias
#2. Compute z
#3. Compute a
#4. Compute Cost
#5. Adjust weights and bias
#6. Compute cost
#7. Check if cost of step 5 decreases than in step 3, if yes go to step 2, if no decrease happened for 10 iterations
#  stop, and return cost, weights, bias

In [233]:
def logistic_train(seed, iters, X, y, alpha):
    '''
       Arguments:
       X     : n*m matrix of numerical features
       y     : 1*m vector of binary labels
       iter  : max. no of iterations if exit criteria is not satisfied, to avoid long training iteration,
              switch to using larger learning rate
       alpha : learning rate  
       
       Returns:
       Function trains a logistic regression models, returns:
       1. Weight and bias vector corresponding to the least cost model
       2. Least cost
       3. Full cost vector for analysis of progression of costs
    '''
    np.random.seed(seed)
    Costs = []
    Weights = []
    Bias = []
    cost_inc = 0
    dim = X.shape
    #1. Initialize random weights as a column vector with elements equal to features (n*1)
    w_init = np.random.rand(dim[0],1)
    assert(w_init.shape == (dim[0],1))
    #1. Initialize random bias as a column vector with elements equal to records in feature matrix (m*1)
    b_init = np.random.rand()
    #2. Compute z
    z_init = Z(X,w_init,b_init)
    #3. Compute a
    a_init = A(z_init)
    #4. Compute initial Cost
    Cost_init = Log_Loss(y,a_init)
    Cost = 0
    for i in range(iters):
        if (i ==0):
            w = w_init
            b = b_init
            z = z_init
            a = a_init
            Cost = Cost_init
        #print(w,"\n")
        #print(Cost,"\n")    
        #5. Adjust weights and bias
        w,b = grad_desc(X = X, w = w, a_vec = a, y_vec = y, b = b, alpha = alpha)
        Weights.append(w)
        Bias.append(b)
        #6. Compute new cost
        z = Z(X,w,b)
        a = A(z)
        Cost_new= Log_Loss(y,a)
        Costs.append(Cost_new)
        if (Cost_new > Cost):
            cost_inc +=1
        if ((Cost_new > Cost) & cost_inc > 5):
            min_cost = min(Costs)
            ind = Costs.index(min_cost)
            return Weights[ind],Bias[ind],Costs[ind], Costs
        Cost = Cost_new
    min_cost = min(Costs)
    ind = Costs.index(min_cost)  
    return Weights[ind],Bias[ind],Costs[ind], Costs
    

### Test on sample data set

In [234]:
# Sample data set of 3 features, 5 records
years = [1,4,3,10,5]
rating_5yr_avg = [2,1,3,4,5]
salary_incr_5yr_avg = [5, 2.5, 3.5, 4, 6]
churn = [1,1,0,1,0]

# Feature matrix x, with a column representing a single feature with m  = 5 records
X = np.array([years, rating_5yr_avg, salary_incr_5yr_avg])
assert(X.shape == (3,5))
dim = X.shape
y = np.array(churn).reshape(1,dim[1])  # A row vector requires reshape to convert from rank 1 array to proper array
seed = 101
Weight_best,Bias_best,Cost_best, Costs = logistic_train(seed, 10000,X,y,0.1)


In [235]:
Weight_best

array([[  3.22152126],
       [-11.45107998],
       [  5.32748433]])

In [236]:
Bias_best

array([-0.20978417])

In [237]:
Cost_best

array([ 0.00074572])

In [238]:
#x
pred_z = Z(X, Weight_best,Bias_best)
pred_a = A(pred_z)

In [239]:
pred_a

array([[  9.98826979e-01,   9.99999517e-01,   1.92238360e-03,
          9.99453290e-01,   8.33255688e-05]])

In [240]:
y

array([[1, 1, 0, 1, 0]])

In [241]:
len(Costs)

10000