In [15]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
from lr_utils import load_dataset
import torch
import tensorflow as tf

## 1. Import Data

In [7]:
train_set_x_orig, train_set_y,\
test_set_x_orig, test_set_y, classes = load_dataset()

In [10]:
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]

In [11]:
print ("Number of training examples: m_train = " + str(m_train))
print ("Number of testing examples: m_test = " + str(m_test))
print ("Height/Width of each image: num_px = " + str(num_px))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_set_x shape: " + str(train_set_x_orig.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x shape: " + str(test_set_x_orig.shape))
print ("test_set_y shape: " + str(test_set_y.shape))

Number of training examples: m_train = 209
Number of testing examples: m_test = 50
Height/Width of each image: num_px = 64
Each image is of size: (64, 64, 3)
train_set_x shape: (209, 64, 64, 3)
train_set_y shape: (1, 209)
test_set_x shape: (50, 64, 64, 3)
test_set_y shape: (1, 50)


## 2. Reshape training and test examples

In [12]:
train_set_x_flatten = train_set_x_orig.reshape(m_train, -1).T
test_set_x_flatten = test_set_x_orig.reshape(m_test, -1).T

print ("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
print ("test_set_y shape: " + str(test_set_y.shape))

train_set_x_flatten shape: (12288, 209)
train_set_y shape: (1, 209)
test_set_x_flatten shape: (12288, 50)
test_set_y shape: (1, 50)


### Standardize features

In [13]:
train_set_x = train_set_x_flatten / 255.
test_set_x = test_set_x_flatten / 255.

# Build Model

<a name='3'></a>
## 3 - General Architecture of the learning algorithm ##

It's time to design a simple algorithm to distinguish cat images from non-cat images.

You will build a Logistic Regression, using a Neural Network mindset. The following Figure explains why **Logistic Regression is actually a very simple Neural Network!**

<img src="images/LogReg_kiank.png" style="width:650px;height:400px;">

**Mathematical expression of the algorithm**:

For one example $x^{(i)}$:
$$z^{(i)} = w^T x^{(i)} + b \tag{1}$$
$$\hat{y}^{(i)} = a^{(i)} = sigmoid(z^{(i)})\tag{2}$$ 
$$ \mathcal{L}(a^{(i)}, y^{(i)}) =  - y^{(i)}  \log(a^{(i)}) - (1-y^{(i)} )  \log(1-a^{(i)})\tag{3}$$

The cost is then computed by summing over all training examples:
$$ J = \frac{1}{m} \sum_{i=1}^m \mathcal{L}(a^{(i)}, y^{(i)})\tag{6}$$

**Key steps**:
In this exercise, you will carry out the following steps: 
    - Initialize the parameters of the model
    - Learn the parameters for the model by minimizing the cost  
    - Use the learned parameters to make predictions (on the test set)
    - Analyse the results and conclude

## 1. Sigmoid

In [16]:
def sigmoid_np(x):  
    return 1 / (1 + np.exp(-x))

def sigmoid_torch(x):
    return 1 / (1 + torch.exp(-x))

@tf.function
def sigmoid_tf(x):
    return 1 / (1 + tf.math.exp(-x))

## 2. Initialing parameters

In [18]:
def initialize_with_zeros_np(dim):
    """
    This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0.
    
    Argument:
    dim -- size of the w vector we want (or number of parameters in this case)
    
    Returns:
    w -- initialized vector of shape (dim, 1)
    b -- initialized scalar (corresponds to the bias) of type float
    """
    
    w = np.zeros((dim, 1))
    b = 0.
    
    return w, b

def initialize_with_zeros_torch(dim):
    w = torch.zeros((dim, 1))
    b = torch.zeros(1)
    # b = 0.
    return w, b

def initialize_with_zeros_tf(dim):
    w = tf.zeros((dim, 1))
    b = tf.zeros(1)
    # b = 0.
    return w, b

## 3. Forward and backward propagation

Forward Propagation:
- You get X
- You compute $A = \sigma(w^T X + b) = (a^{(1)}, a^{(2)}, ..., a^{(m-1)}, a^{(m)})$
- You calculate the cost function: $J = -\frac{1}{m}\sum_{i=1}^{m}(y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)}))$

Here are the two formulas you will be using: 

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T\tag{7}$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})\tag{8}$$

In [65]:
def propagate_np(w, b, X, Y):
    """
    Implement the cost function and its gradient for the propagation explained above

    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples)

    Return:
    cost -- negative log-likelihood cost for logistic regression
    dw -- gradient of the loss with respect to w, thus same shape as w
    db -- gradient of the loss with respect to b, thus same shape as b
    
    Tips:
    - Write your code step by step for the propagation. np.log(), np.dot()
    """
    
    m = X.shape[1]
    
    # FORWARD PROPAGATION (FROM X TO COST)
    
    A = sigmoid_np(np.dot(w.T, X) + b)
    cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    
    # BACKWARD PROPAGATION (TO FIND GRAD)
    
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    
    cost = np.squeeze(cost)

    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

<object data="images/Forward_bacward_notes.pdf" type="application/pdf" width="700px" height="700px">
    <embed src="images/Forward_bacward_notes.pdf">
        <p>This browser does not support PDFs. Please download the PDF to view it: <a href="images/Forward_bacward_notes.pdf">Download PDF</a>.</p>
    </embed>
</object>


In [66]:
def propagate_torch(w, b, X, Y):
    
    m = X.shape[1]
    
    # FORWARD PROPAGATION (FROM X TO COST)
    
    A = sigmoid_torch(torch.mm(w.T, X) + b)
    cost = (-1 / m) * torch.sum(Y * torch.log(A) + (1 - Y) * torch.log(1 - A))
    
    # BACKWARD PROPAGATION (TO FIND GRAD)
    
    dw = (1 / m) * torch.mm(X, (A - Y).T)
    db = (1 / m) * torch.sum(A - Y)
    
    cost = torch.squeeze(cost)

    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

In [76]:
@tf.function
def propagate_tf(w, b, X, Y):
    
    m = X.shape[1]
    
    # FORWARD PROPAGATION (FROM X TO COST)
    
    A = sigmoid_tf(
        tf.tensordot(
            tf.transpose(w), X, axes=1) + b)
    
    cost = (-1 / m) * tf.math.reduce_sum(Y * tf.math.log(A)\
                                         + (1 - Y) * tf.math.log(1 - A))
    
    # BACKWARD PROPAGATION (TO FIND GRAD)
    
    dw = (1 / m) * tf.tensordot(X, tf.transpose(A - Y), axes=1)
    db = (1 / m) * tf.math.reduce_sum(A - Y)
    
    cost = tf.squeeze(cost)

    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

### tests

In [68]:
w =  np.array([[1.], [2]])
b = 1.5
X = np.array([[1., -2., -1.], [3., 0.5, -3.2]])
Y = np.array([[1, 1, 0]])

w_torch = torch.Tensor(w)
b_torch = torch.Tensor([b])
X_torch = torch.Tensor(X)
Y_torch = torch.Tensor(Y)

w_tf = tf.convert_to_tensor(w)
b_tf = tf.convert_to_tensor([b])
X_tf = tf.convert_to_tensor(X)
Y_tf = tf.convert_to_tensor(Y, dtype='float64')

In [69]:
grads, cost = propagate_np(w, b, X, Y)
grads, cost

({'dw': array([[ 0.25071532],
         [-0.06604096]]),
  'db': -0.1250040450043965},
 0.15900537707692405)

In [70]:
grads, cost = propagate_torch(w_torch, b, X_torch, Y_torch)
grads, cost

({'dw': tensor([[ 0.2507],
          [-0.0660]]),
  'db': tensor(-0.1250)},
 tensor(0.1590))

In [77]:
grads, cost = propagate_tf(w_tf, b, X_tf, Y_tf)
grads, cost

({'dw': <tf.Tensor: shape=(2, 1), dtype=float64, numpy=
  array([[ 0.25071532],
         [-0.06604096]])>,
  'db': <tf.Tensor: shape=(), dtype=float64, numpy=-0.1250040450043965>},
 <tf.Tensor: shape=(), dtype=float64, numpy=0.15900537707692405>)