# Logistic Regression implementation

### Image data needs to be flattened before it can be fed to the model.
#### e.g. an array of m 64x64x3 images should be converted into a (64x64x3) by m matrix.   

In [121]:
import numpy as np

In [122]:
x_train_orig, x_test_orig = np.random.randint(255, size = (10,64,64,3)),np.random.randint(255, size = (2,64,64,3))

In [123]:
y_train, y_test = np.random.randint(2, size = (1,10)), np.random.randint(2, size = (1,2))

In [124]:
def flatten(input):
    return (input.reshape(input.shape[0], -1).T)

In [125]:
x_train_flat, x_test_flat = flatten(x_train_orig), flatten(x_test_orig)

In [126]:
print(x_train_flat.shape)
print(x_test_flat.shape)

(12288, 10)
(12288, 2)


In [127]:
print(y_train)
print(y_test)

[[0 0 0 1 0 0 0 0 1 1]]
[[0 1]]


In [129]:
print(y_train.shape)
print(y_test.shape)

(1, 10)
(1, 2)


#### Normalize the pixel data by dividing the array by 255

In [130]:
x_train = x_train_flat/255
x_test = x_test_flat/255

### Helper functions

#### Sigmoid function

sigmoid = 1/(1+exp(-z))

In [132]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

#### Parameter initializer
Logistic regression has two parameters: <b>w</b>(weights) which is a vector of dimension <b>x_train.shape[0]</b> i.e. one for each input feature and <b>b</b>(bias) which is a scalar.

In [134]:
def init_params(dim):
    b = 0
    w = np.zeros(dim,1)
    
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return b, w

#### Forward and backward propagation

<ul>
    <li>In the forward pass we calculate the final predictions and the total cost.</li>
<br>
<li>For the backwards pass we calculate the gradient of the cost with respect to our parameters w and b.</li>

In [136]:
def propagate(w, b, X, Y):
    #Number of training examples to calculate mean
    m = X.shape[1]
    
    #Forward propagation (from prediction to cost)
    
    #Make a prediction
    A = sigmoid(np.dot(w.T, X) + b)
    
    #Calculate cost i.e loss summed over all training examples
    cost = (-1/m)*np.sum(np.dot(Y, (np.log(A).T)) + np.dot((1-Y), (np.log(1-A).T)))
    
    
    #Backward propagation(finding gradients)
    
    #Gradient of cost w.r.t 'w'
    dw = (1/m)*np.dot(X,(A-Y).T)
    
    #Gradient of cost w.r.t 'b'
    db = (1/m)*np.sum(A-Y)
    
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw,
             "db": db}
    
    return grads,cost

#### Gradient Descent Optimizer

In [137]:
def optimizer(w, b, X, Y, num_iter, learning_rate, print_cost = False):
    costs = []
    
    for i in range(num_iter):
        
        #calculate derivatives and cost
        grads, cost = propagate(w, b, X, Y)
        
        #retrieve cacluated derivatives
        dw = grads["dw"]
        db = grads["db"]
        
        #Update paramenters 
        w = w - learning_rate*dw
        b = b - learning_rate*db
        
        #log the cost every 100 iterations
        if(i % 100 == 0):
            costs.append(cost)
        
        
        if(print_cost and i % 100 == 0):
            print("Cost after iteration %i: %f"%(i,cost))
            
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

#### Prediction function

In [153]:
def predict(w, b, X):
    
    # Number of training examples is m
    m = X.shape[1]
    
    # Vector to store predictions
    Y_prediction = np.zeros((1,m))
    
    # Ensuring w is the correct shape
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid((np.dot(w.T, X) + b))
    
    for i in range(A.shape[1]):
        # Convert probabilities A[0,i] to actual predictions p[0,i]
        if(A[0, i] > 0.5):
            Y_prediction[0, i] = 1
        else:
            Y_prediction[0, i] = 0
    
    
    assert(Y_prediction.shape == (1, m))
    
    
    return Y_prediction

### Combine all functions into a model

In [155]:
def model(w, b, X_train, Y_train, X_test, Y_test, num_iter = 2000, learning_rate = 0.5, print_cost = False):
    
    # Initialize parameters
    w, b = initialize_with_zeros(X_train.shape[0])
    
    # Perform Gradient Descent
    params, grads, costs = optimizer(w, b, X_train, Y_train, num_iter, learning_rate, print_cost = False)
    
    # Retrieve updated parameters from 'params' dict
    w = params["w"]
    b = params["b"]
    
    #Use w and b to make predictions on test set
    predictions_test  = predict(w, b, X_test)
    predictions_train = predict(w, b, X_train)
    
    # Print train/test Errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(predictions_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(predictions_test - Y_test)) * 100))
    
    
    
    d = {"costs": costs,
         "predictions_test": predictions_test, 
         "predictions_train" : predictions_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iter": num_iter}
    
    return d
    