# Logistic Regression of Machine Learning with Gredicent Descent 


## Some useful links

[1](https://christophm.github.io/interpretable-ml-book/preface-by-the-author.html)
[2](https://github.com/Kulbear/deep-learning-coursera/blob/master/Neural%20Networks%20and%20Deep%20Learning/Logistic%20Regression%20with%20a%20Neural%20Network%20mindset.ipynb)



In [13]:
# define a Neuron networks class
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
# from lr_utils import load_dataset
%matplotlib inline

class NeuronNetworks:
    pass


In [2]:
# sigmoid function define

def sigmoid(z):
    s = 1/(1 + np.exp(-z))
    return s

def sigmoid_deriviative(z):
    d_s = sigmoid(z)*(1 - sigmoid(z))


In [8]:
# initialize a set random weights

def initialize_with_zeros(m):
    """ 
     This function creates a vector of zeros of shape (m, 1) for w and initializes b to 0.

     Argument:
     dim — size of the w vector we want (or number of parameters in this case)

     Returns:
     w — initialized vector of shape (dim, 1)
     b — initialized scalar (corresponds to the bias)
    """
    w = np.zeros((m, 1))
    b = 0
    return w, b

In [16]:
# propagate function definition

def propagate(w, b, X, Y):
    ''' 
    Arguments:
    w - weights
    b - bias
    X - training set, input data set
    Y - target value (vectors) Y — true “label” vector
    
    Returns: 
    cost — negative log-likelihood cost for logistic regression
    dw — gradient of the loss with respect to w, thus same shape as w
    db — gradient of the loss with respect to b, thus same shape as b
    '''
    # the length of the input sets/training set
    m = X.shape[1]
    
    # Forward propagation by Activation function - sigmoid
    A = sigmoid(np.dot(w.T, X)+ b) # compute activation function
    cost = -(1/m)*(np.sum((Y*np.log(A)) + (1-Y) * np.log(1-A)))
    
    print("Inner Actication function value", A)
    # Backforward propagation by derivative of Activation function and output errors
    # BACKWARD PROPAGATION (TO FIND GRAD)
    dw = (1/m)* np.dot(X, ((A-Y).T))
    db = (1/m) * np.sum(A-Y)
    
    cost = np.squeeze(cost)
    
    grads = {"dw": dw, "db": db}
    
    return grads, cost

    

In [26]:
w, b = np.array([[1], [2]]), 2
X = np.array([[1,2], [3,4]])
Y = np.array([[1, 0]])

grads, cost = propagate(w, b, X, Y)

print("Gradients \n dw: {} \t db: {}".format(grads['dw'], grads['db']))
print("Cost", cost)


Inner Actication function value [[0.99987661 0.99999386]]
Gradients 
 dw: [[0.99993216]
 [1.99980262]] 	 db: 0.49993523062470574
Cost 6.000064773192205


In [34]:
# GRADED FUNCTION: optimize

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    """
    This function optimizes w and b by running a gradient descent algorithm
    
    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of shape (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    print_cost -- True to print the loss every 100 steps
    
    Returns:
    params -- dictionary containing the weights w and bias b
    grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
    costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
    
    Tips:
    You basically need to write down two steps and iterate through them:
        1) Calculate the cost and the gradient for the current parameters. Use propagate().
        2) Update the parameters using gradient descent rule for w and b.
    """
    
    costs = []
    
    for i in range(num_iterations):
        
        
        # Cost and gradient calculation (≈ 1-4 lines of code)
        ### START CODE HERE ### 
        grads, cost = propagate(w, b, X, Y)
        ### END CODE HERE ###
        
        # Retrieve derivatives from grads
        dw = grads["dw"]
        db = grads["db"]
        
        # update rule (≈ 2 lines of code)
        ### START CODE HERE ###
        w = w - learning_rate * dw  # need to broadcast
        b = b - learning_rate * db
        ### END CODE HERE ###
        
        # Record the costs
        if i % 100 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training examples
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" % (i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [35]:
params, grads, costs_history = optimize(w, b, X, Y, num_iterations= 100, learning_rate = 0.009, print_cost = False)
print("Params", params)
print("grads mini cost", grads)
print("real cost", costs_history)


Inner Actication function value [[0.99987661 0.99999386]]
Inner Actication function value [[0.99986799 0.99999325]]
Inner Actication function value [[0.99985877 0.99999258]]
Inner Actication function value [[0.99984892 0.99999184]]
Inner Actication function value [[0.99983837 0.99999103]]
Inner Actication function value [[0.99982709 0.99999015]]
Inner Actication function value [[0.99981501 0.99998917]]
Inner Actication function value [[0.9998021 0.9999881]]
Inner Actication function value [[0.99978829 0.99998692]]
Inner Actication function value [[0.99977351 0.99998562]]
Inner Actication function value [[0.9997577  0.99998419]]
Inner Actication function value [[0.99974079 0.99998263]]
Inner Actication function value [[0.9997227  0.99998091]]
Inner Actication function value [[0.99970334 0.99997902]]
Inner Actication function value [[0.99968264 0.99997694]]
Inner Actication function value [[0.99966049 0.99997465]]
Inner Actication function value [[0.9996368  0.99997214]]
Inner Actication

In [None]:
# predict with output weights and bias

def predict(w, b, X):
    """
     Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)

     Arguments:
     w — weights, a numpy array of size (num_px * num_px * 3, 1)
     b — bias, a scalar
     X — data of size (num_px * num_px * 3, number of examples)

     Returns:
     Y_prediction — a numpy array (vector) containing all predictions (0/1) for the examples in X
    """

    m = X.shape[1]
    Y_prediction = np.zeros((1,m)) # create empty array with m dimentions
    w = w.reshape(X.shape[0], 1)

    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[1]):
        # Convert probabilities A[0,i] to actual predictions p[0,i]
        Y_prediction[0,i] = 1 if A[0, i] > 0.5 else 0
#     pass
    
    return Y_prediction
