# IMPLEMENTING LOGISTIC REGRESSION

In [7]:
import numpy as np


In [8]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])  #(m,n)
y_train = np.array([0, 0, 0, 1, 1, 1])                   

In [9]:
def sigmoid(z):
    """
    Compute the sigmoid of z

    Args:
        z (ndarray): A scalar, numpy array of any size.

    Returns:
        g (ndarray): sigmoid(z), with the same shape as z
         
    """

    g = 1/(1+np.exp(-z))
   
    return g


Loss is a measure of the difference of a single example to its target value <br>
Cost is a measure of the losses over the training set



In [10]:
# As squared error function is not optimized for logistic regression, we will use cross entropy loss function
def compute_cost_logistic(X, y, w, b):
    """
    Computes cost

    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """

    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(X[i],w) + b
        f_wb_i = sigmoid(z_i)
        cost +=  -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
             
    cost = cost / m
    return cost

In [None]:
# cost for regularized logistic regression

def compute_cost_logistic_reg(X, y, w, b, lambda_ = 1):
    """
    Computes the cost over all examples
    Args:
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
      lambda_ (scalar): Controls amount of regularization
    Returns:
      total_cost (scalar):  cost 
    """

    m,n  = X.shape
    cost = 0.
    for i in range(m):
        z_i = np.dot(X[i], w) + b                                      #(n,)(n,)=scalar, see np.dot
        f_wb_i = sigmoid(z_i)                                          #scalar
        cost +=  -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)      #scalar
             
    cost = cost/m                                                      #scalar

    reg_cost = (lambda_/(2*m)) * np.sum(w**2)                                #scalar
    
    total_cost = cost + reg_cost                                       #scalar
    return total_cost                                                  #scalar

In [11]:
# gradient descent for logistic regression

# by luck we have the same formula for the gradient of the cost function as for the linear regression

def compute_gradient_logistic(X, y, w, b): 
    """
    Computes the gradient for logistic regression 
 
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
    Returns
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar)      : The gradient of the cost w.r.t. the parameter b. 
    """
    m,n = X.shape
    dj_dw = np.zeros((n,))                           #(n,)
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i],w) + b)          #(n,)(n,)=scalar
        err_i  = f_wb_i  - y[i]                       #scalar
        dj_dw = dj_dw + ( err_i * X[i] )              #scalar
        dj_db = dj_db + err_i
    dj_dw = dj_dw/m                                   #(n,)
    dj_db = dj_db/m                                   #scalar
        
    return dj_db, dj_dw  

In [None]:
# regularization gradient

def compute_gradient_regularisation(X, y, w, b, lambda_): 
    """
    Computes the gradient for linear regression 
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
      lambda_ (scalar): Controls amount of regularization
      
    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b. 
    """
    m,n = X.shape           #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):                             
        err = (np.dot(X[i], w) + b) - y[i]                 
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * X[i, j]               
        dj_db = dj_db + err                        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m   
    
    dj_dw += (lambda_/m) * w

    return dj_db, dj_dw

In [12]:
def train_gradient_descent(X, y, w_in, b_in, alpha, num_iters): 
    """
    Performs batch gradient descent
    
    Args:
      X (ndarray (m,n)   : Data, m examples with n features
      y (ndarray (m,))   : target values
      w_in (ndarray (n,)): Initial values of model parameters  
      b_in (scalar)      : Initial values of model parameter
      alpha (float)      : Learning rate
      num_iters (scalar) : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,))   : Updated values of parameters
      b (scalar)         : Updated value of parameter 
    """
    w = w_in  
    b = b_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_db, dj_dw = compute_gradient_logistic(X, y, w, b)   

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               
      
       
        
    return w, b       #return final w,b and J history for graphing


In [14]:
# train the model

w_ini  = np.zeros_like(X_train[0])
b_ini  = 0.
alph = 0.1
iters = 10000

w_out, b_out = train_gradient_descent(X_train, y_train, w_ini, b_ini, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")




updated parameters: w:[5.28123029 5.07815608], b:-14.222409982019837


In [15]:

# generate predictions for single point and for multiple points
'''def predict(X, w, b):
    """
    Predicts the class of a new point
    
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
      
    Returns:
      y_pred (ndarray (m,)): Predicted class of each point
    """
    m = X.shape[0]
    y_pred = np.zeros((m,))
    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i],w) + b)
        y_pred[i] = 1 if f_wb_i > 0.5 else 0
    return y_pred'''


# generate predictions for single point 
def predict(X,w,b):

    f_wb = sigmoid(np.dot(X,w) + b)
    return 1 if f_wb > 0.5 else 0


In [17]:
# testing the model
X_test = np.array([0.5, 1.5])  
y_test = np.array(0)
result = predict(X_test,w_out,b_out) 
if result == y_test:
    print(f"\nTest passed. Prediction: {result}, Actual: {y_test}")
else:
    print(f"\nTest failed. Prediction: {result}, Actual: {y_test}")


Test passed. Prediction: 0, Actual: 0
