# This is the scratch implementation of Logistic Regression

In [23]:
import numpy as np

In [24]:
def sigmoid(z):
    """
    Compute the sigmoid of z
    Args:
        z (ndarray): A scalar, numpy array of any size.
    Returns:
        g (ndarray): sigmoid(z), with the same shape as z 
    """

    g = 1 / (1 + ((2.7)  ** (-z)))

    return g

In [25]:
def cost_function_logistic_reg(X, y, w, b, lambda_=0.5):
    """
    Computes the cost over all examples
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value 
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model
    Returns:
      total_cost : (scalar) cost 
    """
    m, n = X.shape
    cost = 0 

    for i in range(m):
        fwb = np.dot(X[i], w) + b
        error = sigmoid(fwb)
        cost += -((y[i] * np.log(error)) + ((1 - y[i]) * (np.log(1 - error)))) 

    cost  =  cost / m 

    # regularization
    reg_cost = 0 
    for j in range(n): 
        reg_cost +=  w[j] ** 2
    reg_cost = (lambda_ / (2 * m)) * reg_cost

    total_cost = cost + reg_cost
    return total_cost

In [26]:
def gradient_compute(X, y, w, b, lambda_ = 0.5):
    """
    Computes the gradient for logistic regression 
 
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      y : (ndarray Shape (m,))  target value 
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model
    Returns
      dj_dw : (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w. 
      dj_db : (scalar)             The gradient of the cost w.r.t. the parameter b. 
    """
    
    m, n = X.shape
    dj_dw = np.zeros((n, ))
    dj_db = 0. 

    for i in range(m): 
        fwb = np.dot(X[i], w) + b
        error = sigmoid(fwb) - y[i]
        for j in range(n):
            dj_dw[j] += error + X[i, j]
        dj_db += error

    dj_dw = dj_dw / m  
    dj_db = dj_db / m  

    # regularized dj_dw
    for j in range(n): 
        dj_dw[j] += (lambda_ / m) * w[j]

    return dj_dw, dj_db

In [29]:
def gradient_descent(X, y, w_init, b_init, LR, iteration):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
    X :    (ndarray Shape (m, n) data, m examples by n features
    y :    (ndarray Shape (m,))  target value 
    w_in : (ndarray Shape (n,))  Initial values of parameters of the model
    b_in : (scalar)              Initial value of parameter of the model
    LR : (float)              Learning rate
    iteration : (int)            number of iterations to run gradient descent
    Returns:
    w : (ndarray Shape (n,)) Updated values of parameters of the model after
      running gradient descent
    b : (scalar)                Updated value of parameter of the model after
      running gradient descent
    """
    
    w = np.copy(w_init)
    b = b_init
    j_red = []
    
    for i in range(iteration):
    
        ## getting dj_dw, dj_db
        dj_dw, dj_db = gradient_compute(X, y, w, b, lambda_ = 0.5)
        
        
        w = w - LR * dj_dw
        b = b - LR * dj_db
        
        # showing each step of J(w,b) reduction 
        j_red.append(cost_function_linear_reg(X, y, w, b, lambda_ = 0.5))
        
        if i% math.ceil(iteration/10) == 0: 
            print(j_red[-1], w, b)
    
    return w, b, j_red