## Logistic Regression Implementation  


In [None]:
import numpy as np

In [None]:
def sigmoid(x):
    return (np.exp(x) / (1 + np.exp(x)))

In [1]:
"""
    1.X.mean(axis=0): This calculates the mean value along each 
    column (axis=0) of the training set X. It computes the average 
    value for each feature.

    2. X.std(axis=0): This calculates the standard deviation along 
    each column (axis=0) of the training set X. It computes how much 
    the values within each feature deviate from their mean.

    3. (X - X.mean(axis=0)): This subtracts the mean of each feature 
    from every value in that feature's column. This centers the data 
    around zero.

    4. Divide by X.std(axis=0): This division scales the centered 
    values by the standard deviation of each feature. It 
    standardizes the data, ensuring that each feature has a 
    variance of 1
"""

def normalize(X):
    # standardizing to have zero mean and unit variance
    return (X - X.mean(axis=0)) / X.std(axis=0)

In [None]:
def gradient_descent(x_train, y, y_predicted):
    num_samples = x_train.shape[0]
    dw = np.dot(x_train.T, (y - y_predicted) * y_predicted * (1 - y_predicted)) / num_samples
    return dw

In [None]:
def compute_loss(x_train, y, w):
    y_predicted = sigmoid(np.dot(x_train, w))
    loss = - np.mean(y * np.log(y_predicted) + (1 - y) * np.log(1 - y_predicted))
    return loss

In [None]:
def logistic_regression(x_train, y, epochs=1000, learning_rate=0.01, early_stopping_threshold=0):
    num_samples, num_features = x_train.shape
    
    # Normalizing train set
    x_train = normalize(x_train)
    
    # Initializing weights w to zero
    # This will create a matrix of size (num_features + 1) X 1
    w = np.zeros((num_features + 1, 1))
    
    # Add a column of 1s for the bias term
    x_train = np.concatenate((x_train, np.ones((num_samples, 1))), axis=1)
    
    # Reshaping target y
    y = y.reshape(num_samples, 1)
    
    for _ in range(epochs):
        # Calculate hypothesis
        y_predicted = sigmoid(np.dot(x_train, w))
        
        # Calculate gradients of loss with respect to weights w
        dw = gradient_descent(x_train, y, y_predicted)
        
        # Updating parameters weights w
        w += (learning_rate * dw)
        
        loss = compute_loss(x_train, y, w)
        
        # Early termination of gradient descent
        if loss < early_stopping_threshold:
            break
    
    return w

In [None]:
def predict(x_train, w):
    num_samples = x_train.shape[0]
    
    # normalizing inputs X
    x_train = normalize(x_train)
    
    # Add a column of 1s for the bias term
    x_train = np.concatenate((x_train, np.ones((num_samples, 1))), axis=1)
    
    # Calculate hypothesis
    y_predicted = sigmoid(np.dot(x_train, w))
    
    # Determining and storing predictions
    predictions = [1 if y_pred >= 0.5 else 0 for y_pred in y_predicted]
    
    return np.array(predictions).reshape(num_samples, 1)