# Logistic Regression

In [72]:
import numpy as np
from copy import deepcopy

Mathmatical definitions<br>
$z^{(i)} = W^T.x^{(i)} + b$<br>
$\hat{y}^{(i)} = a^{(i)} = sigmoid(z^{(i)})$<br>
$sigmoid(z) = \displaystyle\frac{1}{1 + e^{-z}}$<br>
$L(a^{(i)}, y^{(i)}) = -y^{(i)}\log{a^{(i)}} - (1 - y^{(i)})\log{(1 - a^{(i)})}$ <br>
Cost function is the average sum over losses<br>
$J = \displaystyle\frac{1}{m}\displaystyle\sum_{i=0}^{m}L(a^{(i)}, y^{(i)})$

In [2]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [3]:
def loss(a_i, y_i):
    return -y_i * np.log(a_i) - (1 - y_i) * np.log(1 - a_i)

In [4]:
def initialize_with_zeros(dim):
    w = np.zeros((dim, 1))
    b = 0
    return w, b

## Propagate
Now we do forward and backward propagations;
Function below calculates the cost and gradients

$\displaystyle\frac{\partial{J}}{\partial{w_{j}}} = \frac{1}{m}\sum_{i=1}^{m}(a^{(i)} - y^{(i)})x^{(i)}$ <br>
$\displaystyle\frac{\partial{J}}{\partial{b}} = \frac{1}{m}\sum_{i=1}^{m}(a^{(i)} - y^{(i)})$

In [70]:
def propagate(w, b, X, Y):
    n, m = X.shape
    A = sigmoid(np.dot(w.T, X) + b)
    cost = np.sum(-1 * np.dot(Y, np.log(A).T) - np.dot(1 - Y, np.log(1 - A).T)) / m
    
    dz = A - Y
    dw = 1/m * np.dot(X, dz.T)
    db = np.sum(A - Y, axis=1) / m
    
    grads = dict(dw=dw, db=db)
    return grads, cost

In [71]:
w =  np.array([[1.], [2]])
b = 1.5
X = np.array([[1., -2., -1.], [3., 0.5, -3.2]])
Y = np.array([[1, 1, 0]])
propagate(w, b, X, Y)

({'dw': array([[ 0.25071532],
         [-0.06604096]]),
  'db': array([-0.12500405])},
 0.15900537707692405)

In [89]:
def optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, verbos=False):
    costs = []
    w = deepcopy(w)
    b = deepcopy(b)
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        
        dw = grads['dw']
        db = grads['db']
        
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        if (i + 1) % 100 == 0:
            costs.append(cost)
            if verbos: print(f'#{i + 1:{int(np.log10(num_iterations) + 1)}} cost is {cost}')
            
        params = dict(w=w, b=b)
        grads = dict(dw=dw, db=db)
        
    return params, grads, costs

In [90]:
params, grads, costs  = optimize(w, b, X, Y, 10000, verbos=True)

#  100 cost is 0.10579008649578009
#  200 cost is 0.07702879258174412
#  300 cost is 0.05989615811520176
#  400 cost is 0.048764761454255595
#  500 cost is 0.041042925381041144
#  600 cost is 0.03541201128669937
#  700 cost is 0.03114306413600132
#  800 cost is 0.027805003553200393
#  900 cost is 0.025128502865697035
# 1000 cost is 0.022937505237866238
# 1100 cost is 0.02111249872740779
# 1200 cost is 0.019569730959083608
# 1300 cost is 0.01824889259353497
# 1400 cost is 0.01710552576566551
# 1500 cost is 0.016106182511363983
# 1600 cost is 0.015225243722652368
# 1700 cost is 0.014442773681102618
# 1800 cost is 0.013743039105931646
# 1900 cost is 0.013113465559625255
# 2000 cost is 0.012543888305947416
# 2100 cost is 0.012026005491800348
# 2200 cost is 0.011552972934424715
# 2300 cost is 0.011119099688773199
# 2400 cost is 0.01071961644175916
# 2500 cost is 0.010350497272881268
# 2600 cost is 0.010008321025241225
# 2700 cost is 0.009690162426027112
# 2800 cost is 0.009393505795626115
#

## Prediction
Now using predicted outcome we classify

In [93]:
def predict(w, b, X, threshold=0.5):
    A = sigmoid(np.dot(w.T, X) + b)
    
    A[A > threshold] = 1
    A[A <= threshold] = 0
    return A

## Merging functions

In [98]:
def logistic_regression(X_train, Y_train, X_test, Y_test, num_iterations=1000, learning_rate=0.001, verbos=False):
    w, b = initialize_with_zeros(X_train.shape[0])
    
    params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate)
    w = params['w']
    b = params['b']
    
    dw = grads['dw']
    db = grads['db']
    
    y_train_pred = predict(w, b, X_train)
    y_test_pred = predict(w, b, X_test)
    
    if verbos:
        print("train accuracy: {} %".format(100 - np.mean(np.abs(y_train_pred - Y_train)) * 100))
        print("test accuracy: {} %".format(100 - np.mean(np.abs(y_test_pred - Y_test)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": y_test_pred, 
         "Y_prediction_train" : y_train_pred, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [99]:
logistic_regression(X, Y, X, Y, verbos=True)

train accuracy: 100.0 %
test accuracy: 100.0 %


{'costs': [0.5847310319119523,
  0.5038389287421158,
  0.4426825284032568,
  0.3951469007164264,
  0.3572054105364358,
  0.3261973269679616,
  0.3003347778596492,
  0.27838904591293473,
  0.25949486835947394,
  0.24302800501521693],
 'Y_prediction_test': array([[1., 1., 0.]]),
 'Y_prediction_train': array([[1., 1., 0.]]),
 'w': array([[-0.10282038],
        [ 0.61271228]]),
 'b': array([0.13516195]),
 'learning_rate': 0.001,
 'num_iterations': 1000}