In [1]:
#basic_sigmoid
import math
def basic_sigmoid(x):
    """
    Compute sigmoid of x.
    
    Arguments:
    x -- A scaler
    
    Return:
    s -- sigmoid(x)
    
    """
    s = 1/(1 + math.exp(-x))
    
    return s

In [2]:
basic_sigmoid(3)

0.9525741268224334

In [3]:
#importing numpy
import numpy as np

#example of np.exp
x = np.array([1,2,3])
print(np.exp(x))

[ 2.71828183  7.3890561  20.08553692]


In [4]:
#example of vector operation
x = np.array([1,2,3])
print(x+3)

[4 5 6]


### Implement the sigmoid function using numpy.

In [5]:
def sigmoid(x):
    """
    Compute the sigmoid of x
    
    Arguments:
    x -- A scalar or numpy array of any size
    
    Return:
    s -- sigmoid(x)
    """
    s = 1/(1 + np.exp(-x))
    
    return s

In [6]:
x = np.array([1,2,3])
sigmoid(x)

array([0.73105858, 0.88079708, 0.95257413])

## 1.2 - Sigmoid gradient

As you've seen in lecture, you will need to compute gradients to optimize loss functions using backpropagation. Let's code your first gradient function.

Exercise: Implement the function sigmoid_grad() to compute the gradient of the sigmoid function with respect to its input x. The formula is: $$sigmoid_derivative(x) = \sigma'(x) = \sigma(x) (1 - \sigma(x))\tag{2}$$ You often code this function in two steps:

    Set s to be the sigmoid of x. You might find your sigmoid(x) function useful.
    Compute $\sigma'(x) = s(1-s)$


In [7]:
def sigmoid_derivative(x):
        """
    Compute the gradient (also called the slope or derivative) of the sigmoid function with respect to its input x.
    You can store the output of the sigmoid function into variables and then use it to calculate the gradient.
    
    Arguments:
    x -- A scalar or numpy array

    Return:
    ds -- Your computed gradient.
    """
        s = 1/(1+np.exp(-x))
        ds = s * (1 - s)
    
        return ds

In [8]:
x = np.array([1,2,3])
sigmoid_derivative(x)

array([0.19661193, 0.10499359, 0.04517666])

## 1.3 - Reshaping arrays

Two common numpy functions used in deep learning are np.shape and np.reshape().

    X.shape is used to get the shape (dimension) of a matrix/vector X.
    X.reshape(...) is used to reshape X into some other dimension.


Implement image2vector() that takes an input of shape (length, height, 3) and returns a vector of shape (length*height*3, 1). For example, if you would like to reshape an array v of shape (a, b, c) into a vector of shape (a*b,c) you would do:

In [9]:
def image2vector(image):
    """
    Argument:
    image -- a numpy of array of shape(length, height, depth)
    
    Returns:
    v -- a vector of shape(length* height* depth, 1)
    
    """
    v = image.reshape((image.shape[0]*image.shape[1]*image.shape[2],1))
    
    return v

In [10]:
# This is a 3 by 3 by 2 array, typically images will be (num_px_x, num_px_y,3) where 3 represents the RGB values
image = np.array([[[ 0.67826139,  0.29380381],
        [ 0.90714982,  0.52835647],
        [ 0.4215251 ,  0.45017551]],

       [[ 0.92814219,  0.96677647],
        [ 0.85304703,  0.52351845],
        [ 0.19981397,  0.27417313]],

       [[ 0.60659855,  0.00533165],
        [ 0.10820313,  0.49978937],
        [ 0.34144279,  0.94630077]]])

print ("image2vector(image) = " + str(image2vector(image)))

image2vector(image) = [[0.67826139]
 [0.29380381]
 [0.90714982]
 [0.52835647]
 [0.4215251 ]
 [0.45017551]
 [0.92814219]
 [0.96677647]
 [0.85304703]
 [0.52351845]
 [0.19981397]
 [0.27417313]
 [0.60659855]
 [0.00533165]
 [0.10820313]
 [0.49978937]
 [0.34144279]
 [0.94630077]]


In [11]:
#normalizeRows
def Normalizerows(x):
    """
    Argument:
    x -- A numpy matrix of shape (n, m)
    
    Returns:
    x -- The normalized (by row) numpy matrix. 
    """
    x = np.linalg.norm(x, ord=2, axis=1, keepdims=True) 
    return x

In [12]:
x= np.array([
    [0,3,4],
    [1,6,4]])
print('normalizerows(x) =' + str(Normalizerows(x)))

normalizerows(x) =[[5.        ]
 [7.28010989]]


## Broadcasting and softmax Function

The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. 

When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions and works its way forward. Two dimensions are compatible when

    they are equal, or

    one of them is 1


In [13]:
#softmax
def softmax(x):
    """
    calculate the softmax for each row of the input x.
    
    Argument:
    x -- A numpy matrix of shape(n,m)
    
    Returns:
    s -- A numpy matrix of softmax of x of shape(n,m)
    
    """
     # Apply exp() element-wise to x
    x_exp = np.exp(x)
    print(x_exp.shape)
    
    # Create a vector x_sum that sums each row of x_exp.
    x_sum = np.sum(x_exp, keepdims = True, axis = 1)
    print(x_sum.shape)
    
    # Compute softmax(x) by dividing x_exp by x_sum. It should automatically use numpy broadcasting.
    s = x_exp/x_sum
    print(s.shape)
    
    return s

In [14]:
x = np.array([
    [9,2,2,2,2],
    [7,3,2,1,1]])
print("softmax(x) = " + str(softmax(x)))

(2, 5)
(2, 1)
(2, 5)
softmax(x) = [[9.96365728e-01 9.08567939e-04 9.08567939e-04 9.08567939e-04
  9.08567939e-04]
 [9.70863333e-01 1.77819822e-02 6.54162568e-03 2.40652960e-03
  2.40652960e-03]]


## Vectorization

In [27]:
import time

x1 = [9,2,5,0,0,7,5,0,0,0,9,2,5,0,0]
x2 = [9,2,2,9,0,9,2,5,0,9,2,5,1,2,2]

###classic DOT product of Vectors Implementation ###
tic = time.process_time()
dot = 0
for i in range(len(x1)):
    dot += x1[i]*x2[i]
toc = time.process_time()
print (" dot = " + str(dot) + "\n -----Computation time = " + str(1000*(toc-tic)) + "ms")

### Classic Outer Product Implementation ###
tic=time.process_time()
outer = np.zeros((len(x1),len(x2))) #we create a len(x1)*len(x2) matrix with only zeros
for i in range(len(x1)):
    for j in range(len(x2)):
        outer[i,j]=x1[i]*x2[j]
toc= time.process_time()
print("outer =" + str(outer) + "\n -----Computation time = " + str(1000*(toc-tic)) + "ms")

### Classic Elementwise Implementation ###
tic = time.process_time()
print ("tic " + str(tic))
mul = np.zeros(len(x1))
for i in range(len(x1)):
    mul[i]=x1[i]*x2[i]
toc = time.process_time()
print("elementwise multipication = " + str(mul) + "\n -----Computation time = " + str(1000*(toc-tic)) + "ms")

### Classic Genaral Dot product Implementation ###
W = np.random.rand(3,len(x1)) # Random 3*len(x1) numpy array
tic = time.process_time()
print ("tic " + str(tic))
gdot = np.zeros(W.shape[0])
for i in range(W.shape[0]):
    for j in range(len(x1)):
        gdot[i] += W[i,j]*x1[j]
toc = time.process_time()
print ("toc " +str(toc))
print ("gdot = " + str(gdot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

 dot = 201
 -----Computation time = 0.0ms
outer =[[81. 18. 18. 81.  0. 81. 18. 45.  0. 81. 18. 45.  9. 18. 18.]
 [18.  4.  4. 18.  0. 18.  4. 10.  0. 18.  4. 10.  2.  4.  4.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0. 45. 10. 25.  5. 10. 10.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [63. 14. 14. 63.  0. 63. 14. 35.  0. 63. 14. 35.  7. 14. 14.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0. 45. 10. 25.  5. 10. 10.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [81. 18. 18. 81.  0. 81. 18. 45.  0. 81. 18. 45.  9. 18. 18.]
 [18.  4.  4. 18.  0. 18.  4. 10.  0. 18.  4. 10.  2.  4.  4.]
 [45. 10. 10. 45.  0. 45. 10. 25.  0. 45. 10. 25.  5. 10. 10.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
 ----

### Vectorized 

In [28]:
x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

### VECTORIZED DOT PRODUCT OF VECTORS ###
tic = time.process_time()
dot = np.dot(x1,x2)
toc = time.process_time()
print ("dot = " + str(dot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

### VECTORIZED OUTER PRODUCT ###
tic = time.process_time()
outer = np.outer(x1,x2)
toc = time.process_time()
print ("outer = " + str(outer) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

### VECTORIZED ELEMENTWISE MULTIPLICATION ###
tic = time.process_time()
mul = np.multiply(x1,x2)
toc = time.process_time()
print ("elementwise multiplication = " + str(mul) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

### VECTORIZED GENERAL DOT PRODUCT ###
tic = time.process_time()
dot = np.dot(W, x1)
toc = time.process_time()
print ("gdot = " + str(dot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

dot = 278
 ----- Computation time = 0.0ms
outer = [[81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [63 14 14 63  0 63 14 35  0  0 63 14 35  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]
 ----- Computation time = 0.0ms
elementwise multiplication = [81  4 10  0  0 63 10  0  0  0 81  4 25  0  0]
 ----- Computation time = 0.0ms
gdot = [23.80465595 29.33007282 22.72589398]
 ----- Computation time = 0.0ms


## 2.1 Implement the L1 and L2 loss functions

Exercise: Implement the numpy vectorized version of the L1 loss. You may find the function abs(x) (absolute value of x) useful.

In [29]:
def L1(yhat, y):
    """
    Arguments
    yhat -- vector of size m(predicted lebels)
    y -- vector of size m (true labels)
    
    Returns:
    loss -- the value of the L1 loss function
    """
    
    loss = sum(abs(y-yhat))
    return loss


In [30]:
yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L1 = " + str(L1(yhat,y)))

L1 = 1.1


In [31]:
def L2(yhat, y):
    """
    Arguments
    yhat -- vector of size m(predicted lebels)
    y -- vector of size m (true labels)
    
    Returns:
    loss -- the value of the L1 loss function
    """
    loss = sum((y-yhat)** 2)
    return loss

In [32]:
yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L2 = " + str(L2(yhat,y)))

L2 = 0.43
