In [None]:
# Develop the linear regression algorithm from scratch
'''
Workflow:
    - Initialize parameters (w, b)
    - Compute forward_pass z = (w @ x) + b
    - Sigmoid function a = (⅟1 + np.exp(-z))
    - compute cost = (⅟n_samples) * ∑-(ylog(y_pred) + (1-y)log(1 - y_pred))
    - compute ∂/∂w = (1/n_samples) * (y - y_pred) @ X.T
    - compute ∂/∂b = (1/n_samples) * ∑(y - y_pred)
    - update parameters (w, b)
'''

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class LogisticRegressionFromScratch():
    '''Develop the Logistic Regression Model From Scratch'''
    def __init__(self, lr: float = 0.01, epochs: int = 5000) -> None:
        self.lr = lr
        self.epochs = epochs
        self.w = None
        self.b = None
        self.cost_history = []

    def initialize_parameters(self, n_features: int) -> None:
        '''Initialize the model's parameters (w,b)'''
        self.w = np.zeros(n_features)
        self.b = 0.0

    def compute_forward_pass(self, X: np.ndarray) -> np.ndarray:
        '''Compute the initial forward pass on initialized parameters
        
        Args:
            X : feature matrix (n_samples, n_features)
        Returns:
            y_pred : predicted labels (n_samples,)
        '''
        if isinstance(X, pd.DataFrame):
            X = X.values

        y_pred = X @ self.w + self.b
        return y_pred

    def compute_cost(self, y: np.ndarray, y_pred: np.ndarray) -> int | float:
        '''Computes the cost (cross-entropy loss)
        
        Args:
            y: True Labels
            y_pred: Predicted labels

        Returns:
            Cost (scalar)
        '''
        n_samples = len(y)

        cost = (1 / n_samples) * np.sum(- (y * np.log(y_pred + 1e-5) + (1 - y) * np.log(1 - y_pred + 1e-5)))
        return cost

    def sigmoid(self, z: np.ndarray) -> None:
        '''Sigmoid Function'''
        return 1 / (1 + np.exp(-z))

    def gradient(self, X: np.ndarray, y_pred: np.ndarray, y: np.ndarray) -> tuple:
        '''Compute partial derivatives w.r.t. w and b
        
        Args:
            X : feature_matrix
            y_pred : Predicted labels
            y :True labels
            
        Returns:
            Derivatives w.r.t. w and b
        '''
        if isinstance(X,pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        n_samples = X.shape[0]

        error = y_pred - y

        dw = (1 / n_samples) * np.dot(X.T, error)
        db = (1 / n_samples) * np.sum(error)
        return dw, db

    def update_paramters(self, dw: np.ndarray, db: float) -> None:
        '''Update the parameters of w and b
        
        Args:
            dw : Derivative w.r.t. w
            db : Derivative w.r.t. b
        '''
        self.w -= self.lr * dw
        self.b -= self.lr * db

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'LogisticRegressionFromScratch':
        '''Train logistic model from scratch
        
        Args: 
            X : feature matrix
            y : true labels
            
        Returns:
            A trained logistic regression model
        '''
        # convert X and y to numpy arrays if they are instances of a pandas dataframe
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.DataFrame):
            y = y.values

        n_samples, n_features = X.shape
        # initialize parameters
        self.initialize_parameters(n_features)
        
        print('Training a Logistic Regression Model...')
        for epoch in range(1, self.epochs+1) :
            # forward pass
            z = self.compute_forward_pass(X)
            # Pass predictions into an activation function
            y_pred = self.sigmoid(z)
            # cost function
            cost = self.compute_cost(y, y_pred)
            self.cost_history.append(cost)

            # partial derivatives w.r.t. w & b (gradients)
            dw, db = self.gradient(X, y_pred, y)

            # update parameters (w, b)
            self.w -= self.lr * dw
            self.b -= self.lr * db

            if epoch % 100 == 0:
                print(f'Cost after {epoch} epochs : {cost:.4f}') 

        print('Logistic Regression Model trained successfully')

    def predict(self, X: np.ndarray) -> np.ndarray:
        '''Make predictions based on trained model'''
        if self.w is None or self.b is None:
            print(f'Model not yet trained. Call the "fit" function first')
            raise

        if isinstance(X, pd.DataFrame):
            X = X.values
        # compute predictions
        z = self.compute_forward_pass(X)

        predictions = self.sigmoid(z)

        # convert predictions to either 0 or 1 with a threshold of 0.5
        threshold = 0.5
        y_pred = (predictions >= threshold).astype(int)
        return y_pred
    
    def get_parameters(self):
        '''Get learned learned parameters'''
        return {
            'weight' : self.w,
            'bias' : self.b,
            'final_cost' : self.cost_history[-1]
        }


if __name__ == '__main__':

    # Generate data for CLASSIFICATION
    X = np.random.randn(100, 3)

    true_w = np.array([3.0,-2.0,1.5])
    true_b = 5.0

    # Compute logits
    logits = X @ true_w + true_b

    # Apply sigmoid to get probabilities
    probabilities = 1 / (1 + np.exp(-logits))

    # Convert to binary labels (0 or 1)
    y = (probabilities > 0.5).astype(int)

    # Or add randomness based on probability
    y = (np.random.rand(X.shape[0]) < probabilities).astype(int)

    print('='*50)
    print(f'SYNTHETIC DATA GENERATED')
    print('='*50)
    print(f'Samples : {X.shape[0]}')
    print(f'Features : {X.shape[1]}')
    print(f'True Weight : {true_w}')
    print(f'True bias : {true_b}')
    print('='*50)

    # train model
    model = LogisticRegressionFromScratch()
    model.fit(X,y)

    # get learned paramters
    params = model.get_parameters()

    print('='*50)
    print('LEARNED PARAMETERS')
    params_weight = params['weight']
    params_bias = params['bias']
    params_final_cost = params['final_cost']

    print(f'Learned weight: {params_weight}')
    print(f'Learned Bias : {params_bias}')
    print(f'Final cost after training : {params_final_cost}')

    # compare with true weight and bias
    print('='*50)
    print('DIFFERENCE B/N LEARNED AND TRUE PARAMETERS')
    print('='*50)

    weight_error = abs(true_b - params['weight'])
    bias_error = abs(true_b - params["bias"])

    print(f'Weight Error : {weight_error}')
    print(f'Bias Error : {bias_error}')
