In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load in 

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

#Any results you write to the current directory are saved as output.



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py #to interact with a dataset that is stored on an H5 file
import scipy
from PIL import Image #PIL and scipy are imported to test model with my own picture
from scipy import ndimage

def load_dataset():
    train_dataset = h5py.File('/kaggle/input/ngdlcourse1week2/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels\
    #print(train_set_y_orig.shape)

    test_dataset = h5py.File('/kaggle/input/ngdlcourse1week2/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    #this reshape functions ensures that train_set_y_orig have 1 row and number of rows train_set_y_orig has
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) #shape[0] represents the number of rows in numpy arrays
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [None]:

# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
#print(train_set_x_orig)
#print(train_set_y)

In [None]:
index = 11
plt.imshow(train_set_x_orig[index])
#train_set_y[:, index] is an array with 1 row and 1 column
#np.squeeze extracts the element from the array
print(np.squeeze(train_set_y[:, index]))
print ("y = " + str(train_set_y[:, index]) + ", it's a '" + classes[np.squeeze(train_set_y[:, index])].decode("utf-8") +  "' picture.")

In [None]:
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
m_label = train_set_y.shape[1]
num_px = train_set_x_orig.shape[1]

print ("Number of training examples: m_train = " + str(m_train))
print ("Number of testing examples: m_test = " + str(m_test))
print ("Height/Width of each image: num_px = " + str(num_px))
print("Number of labels in training set: m_label = "+str(m_label))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_set_x shape: " + str(train_set_x_orig.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x shape: " + str(test_set_x_orig.shape))
print ("test_set_y shape: " + str(test_set_y.shape))

Exercise: Reshape the training and test data sets so that images of size (num_px, num_px, 3) are flattened into single vectors of shape (num_px * num_px * 3, 1).

A trick when you want to flatten a matrix X of shape (a,b,c,d) to a matrix X_flatten of shape (b*c*d, a) is to use:

X_flatten = X.reshape(X.shape[0], -1).T      # X.T is the transpose of X

In [None]:
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

print ("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
print ("test_set_y shape: " + str(test_set_y.shape))
print ("sanity check after reshaping: " + str(train_set_x_flatten[0:3,0]))

To represent color images, the red, green and blue channels (RGB) must be specified for each pixel, and so the pixel value is actually a vector of three numbers ranging from 0 to 255.

One **common preprocessing step in machine learning is to center and standardize your dataset**, meaning that you **substract the mean of the whole numpy array from each example**, and then **divide each example by the standard deviation of the whole numpy array**. But for picture datasets, it is simpler and more convenient and works almost as well to just divide every row of the dataset by 255 (the maximum value of a pixel channel).

<!-- During the training of your model, you're going to multiply weights and add biases to some initial inputs in order to observe neuron activations. Then you backpropogate with the gradients to train the model. But, it is extremely important for each feature to have a similar range such that our gradients don't explode. You will see that more in detail later in the lectures. !--> 

Let's standardize our dataset.

In [None]:
train_set_x = train_set_x_flatten/255
test_set_x = test_set_x_flatten/255

print(train_set_x.shape)
print(train_set_x)

<font color='blue'>
**What you need to remember:**

Common steps for pre-processing a new dataset are:
- Figure out the dimensions and shapes of the problem (m_train, m_test, num_px, ...)
- Reshape the datasets such that each example(each data, in this case each image) is now a vector of size (num_px \* num_px \* 3, 1)
- "Standardize" the data

## 4 - Building the parts of our algorithm ## 

The main steps for building a Neural Network are:
1. Define the model structure (such as number of input features) [here there are 64X64X3 features]
2. Initialize the model's parameters
3. Loop:
    - Calculate current loss (forward propagation)
    - Calculate current gradient (backward propagation)
    - Update parameters (gradient descent)

You often build 1-3 separately and integrate them into one function we call `model()`.

### 4.1 - Helper functions

**Exercise**: Using your code from "Python Basics", implement `sigmoid()`. As you've seen in the figure above, you need to compute $sigmoid( w^T x + b) = \frac{1}{1 + e^{-(w^T x + b)}}$ to make predictions. Use np.exp().

In [None]:
#z is a scalar or numpy array of any size
def sigmoid(z):
    s = 1/(1+ np.exp(-z))
    return s



In [None]:
print("sigmoid([0,2]): "+ str(sigmoid(np.array([0,2]))) )

### 4.2 - Initializing parameters

**Exercise:** Implement parameter initialization in the cell below. You have to initialize w as a vector of zeros. If you don't know what numpy function to use, look up np.zeros() in the Numpy library's documentation.

In [None]:
def init_w_b(dimension):
    w = np.zeros((dimension,1))
    b = 0
    
    assert(w.shape==(dimension,1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w,b

In [None]:
dim = 2
w , b = init_w_b(dim)
print ("w = " + str(w))
print ("b = " + str(b))

### 4.3 - Forward and Backward propagation

Now that your parameters are initialized, you can do the "forward" and "backward" propagation steps for learning the parameters.

**Exercise:** Implement a function `propagate()` that computes the cost function and its gradient.

**Hints**:

Forward Propagation:
- You get X
- You compute $A = \sigma(w^T X + b) = (a^{(0)}, a^{(1)}, ..., a^{(m-1)}, a^{(m)})$
- You calculate the cost function: $J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$

Here are the two formulas you will be using: 

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T\tag{7}$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})\tag{8}$$

In [None]:
def propagate(w,b,X,Y):
    m = X.shape[1] #number of examples in training set
    
    #FORWARD PROPAGATION
    A = sigmoid(np.dot(w.T, X)+b)
    #print(A)
    
    #np.sum() considers the array to have only one row(flattened). Hence,calculates the sum of all elements
    cost = (-np.sum(Y*np.log(A) + (1-Y)*np.log(1-A)))/m 
    #print(cost)
    
    #BACKWARD PROPAGATION
    
    dw = np.dot(X,(A-Y).T)/m #this has as many rows as the number of features
    db = np.sum(A-Y)/m
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    gradient = { "dw": dw,
                 "db": db
        
    }
    
    return gradient, cost

In [None]:
w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
grads, cost = propagate(w, b, X, Y)

print ("grads dw = " + str(grads["dw"]))
print ("grads db = " + str(grads["db"]))
print ("cost = " + str(cost))

**Exercise:** Write down the optimization function. The goal is to learn $w$ and $b$ by minimizing the cost function $J$. For a parameter $\theta$, the update rule is $ \theta = \theta - \alpha \text{ } d\theta$, where $\alpha$ is the learning rate.

In [None]:
#the objective of this function is to learn w and b by minimizing the cost function.
#this function is called after the arrays w and b have been initialized

def optimize(w, b, X, Y, numberOfIterations, learningRate):
    
    costs = []
    
    for i in range(numberOfIterations):
        gradients, cost = propagate(w,b,X,Y)
        dw = gradients["dw"]
        db = gradients["db"]
        
        w = w - learningRate*dw
        b = b - learningRate*db
        
        # Record the costs every 100 training examples
        if i % 100 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training examples
        if i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs
        

In [None]:
params, grads, costs = optimize(w, b, X, Y, numberOfIterations= 500, learningRate = 0.009)

print ("w = " + str(params["w"]))
print ("b = " + str(params["b"]))
print ("dw = " + str(grads["dw"]))
print ("db = " + str(grads["db"]))

**Exercise:** The previous function will output the learned w and b. We are able to use w and b to predict the labels for a dataset X. Implement the `predict()` function. There is two steps to computing predictions:

1. Calculate $\hat{Y} = A = \sigma(w^T X + b)$

2. Convert the entries of a into 0 (if activation <= 0.5) or 1 (if activation > 0.5), stores the predictions in a vector `Y_prediction`. If you wish, you can use an `if`/`else` statement in a `for` loop (though there is also a way to vectorize this). 

In [None]:
def predict(w,b,X):
    m = X.shape[1]
    Nx = X.shape[0]
    w = w.reshape(Nx,1)
    Y_prediction = np.zeros((1,m))
    
    A = sigmoid(np.dot(w.T, X)+b)
    print(A)
    #A = A + 0.49999
    A = np.around(A)
    print(A)
    
    Y_prediction = A
    assert(Y_prediction.shape == (1,m))
    
    return Y_prediction
    

In [None]:
w = np.array([[0.1124579],[0.23106775]])
b = -0.3
X = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]])
print ("predictions = " + str(predict(w, b, X)))

## 5 - Merge all functions into a model ##

You will now see how the overall model is structured by putting together all the building blocks (functions implemented in the previous parts) together, in the right order.

**Exercise:** Implement the model function. Use the following notation:
    - Y_prediction for your predictions on the test set
    - Y_prediction_train for your predictions on the train set
    - w, costs, grads for the outputs of optimize()

In [None]:
def model(X_train, Y_train, X_test, Y_test, numberOfIterations, learningRate):
    
    dimension = X_train.shape[0]
    w,b = init_w_b(dimension)
    
    params, grads, costs = optimize(w,b,X_train,Y_train, numberOfIterations, learningRate)
    w = params["w"]
    b = params["b"]
    
    Y_prediction_train = predict(w,b,X_train)
    Y_prediction_test = predict(w,b,X_test)
    
#     print(w)
#     print(b)
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learningRate,
         "num_iterations": numberOfIterations}
    
    return d

In [None]:
d = model(train_set_x, train_set_y, test_set_x, test_set_y, numberOfIterations = 2000, learningRate = 0.005)

In [None]:
# Example of a picture that was wrongly classified.
index = 40
plt.imshow(test_set_x[:,index].reshape((num_px, num_px, 3)))
#print(classes[int(d["Y_prediction_test"][0,index])])
print ("y = " + str(test_set_y[0,index]) + ", you predicted that it is a \"" + classes[int(d["Y_prediction_test"][0,index])].decode("utf-8") +  "\" picture.")

In [None]:
## START CODE HERE ## (PUT YOUR IMAGE NAME) 
my_image = "3.jpg"   # change this to the name of your image file 
## END CODE HERE ##

# We preprocess the image to fit your algorithm.
fname = "/kaggle/input/catclassifiertest/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
my_image = scipy.misc.imresize(image, size=(num_px,num_px)).reshape((1, num_px*num_px*3)).T
my_predicted_image = predict(d["w"], d["b"], my_image)

plt.imshow(image)
print("y = " + str(np.squeeze(my_predicted_image)) + ", your algorithm predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")