In [None]:
from IPython.display import display, HTML

def load_styles():
    styles_and_html = """
    <style>
      .notes-container {
        font-family: Arial, sans-serif;
        color: #333;
        line-height: 1.6;
        text-align: left;
      }
      .notes-container h1 {
        color: #3498db;
        border-bottom: 2px solid #3498db;
        padding-bottom: 10px;
        margin: 0;
        text-align: left;
      }
      .note-box {
        background-color: #f8f9fa;
        padding: 20px;
        border-radius: 8px;
        margin: 20px 0;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        width: 100%;
      }
      .highlight-box {
        background-color: #e8f4f8;
        padding: 15px;
        border-left: 4px solid #3498db;
        border-radius: 4px;
      }
      .highlight-box strong {
        color: #3498db;
        display: block;
        margin-top: 5px;
      }
      .code-block {
        margin: 8px 0;
        font-family: 'Courier New', monospace;
        font-size: 25px;
        padding: 10px;
        border-radius: 4px;
        overflow-x: auto;
      }
      .note-description {
        font-size: 14px;
        color: #555;
        margin-top: 5px;
      }
      .highlight-text {
        color: #ee5d6c;
      }
      .highlight-heading {
        color: #3498db;
      }
      ul {
        padding-left: 20px;
        margin: 0;
        text-align: left;
      }
      li {
        text-align: left;
      }
      h1 {
        color: #3498db !important;
        border-bottom: 2px solid #3498db !important;
        padding-bottom: 10px !important;
        margin: 0 0 10px 0 !important;
        text-align: left !important;
        font-family: Arial, sans-serif !important;
      }
    </style>

    """
    display(HTML(styles_and_html))

load_styles()


In [4]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

true_bias = -15
features, target, true_weights = make_regression(n_features=5,
                coef=True,
                n_informative=3,
                bias=true_bias,
                n_samples=30000,
                random_state=42,
                )

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, shuffle=True, random_state=42)



array([68.13176519,  0.        , 81.08849711,  0.        , 47.54299311])

In [12]:
class CustomLinearRegression:
    def __init__(self, number_of_iterations=1000, learning_rate=0.1,
                regularize=False, regularization_strength=1, regularization_type='l2'):
        self.weights : np.ndarray = None  # (n_features, )
        self.bias : float = None  # scalar 
        self.number_of_iterations : int = number_of_iterations
        self.learning_rate : float = learning_rate
        self.cost_history : list[float] = []
        self.regularize = regularize
        self.regularization_strength = regularization_strength
        self.regularization_type = regularization_type

    # ---------------------------------------------------------------------------------------------------------
    
    def _compute_loss(self, true_values, predicted_values) -> float:
    # the difference between the the true values (y) and the predicted values (y_hat)
        errors = true_values - predicted_values
        
        squared_errors = np.square(errors)  # square the errors
        mean_squared_error = np.mean(squared_errors)  # sum the squared error then divide by number of samples
    
        # if user chooses to regularize
        if self.regularize and self.regularization_type == 'l2':
            # number of samples
            m = true_values.shape[0]
            
            weights_squared = np.square(self.weights)
            sum_weights_squared = np.sum(weights_squared)
            
            l2_penalty = (self.regularization_strength / m) * sum_weights_squared
            return 1/2 * (mean_squared_error + l2_penalty)
    
        elif self.regularize and self.regularization_type == 'l1':
            m = true_values.shape[0]
            
            weights_absolute_values = np.abs(self.weights)
            sum_absolute_values = np.sum(weights_absolute_values)
            
            l1_penalty = (self.regularization_strength / m) * sum_absolute_values
            return 1/2 * (mean_squared_error + l1_penalty)
            
        # if user does not choose to regularize
        else:
            return 1/2 * mean_squared_error

    # ---------------------------------------------------------------------------------------------------------
    
    def _forward_propagation(self, input_matrix, true_values):
        y_hat = input_matrix @ self.weights + self.bias  # predictions of the model using current weights and bias
        cost = self._compute_loss(true_values, y_hat)    # cost of the current predictions and the true values 
        self.cost_history.append(cost)                   # add the current cost the list of costs 
        return y_hat                                     # return the current predictions and the current cost

    # ---------------------------------------------------------------------------------------------------------
    
    def _backward_propagation(self, feature_matrix, true_values, predicted_values):
        number_of_samples = feature_matrix.shape[0]
        error = predicted_values - true_values
    
        # notice that bias is not regularized (no l1 or l2 terms)
        bias_derivative = np.mean(error)
    
        # weights gradient without regulariztion
        unregularized_weights_gradient = 1 / number_of_samples * (feature_matrix.T @ error)
    
        # add L2 regularization if enabled
        if self.regularize and self.regularization_type == 'l2':
            l2_penalty = (self.regularization_strength / number_of_samples) * self.weights
            weights_gradient = unregularized_weights_gradient + l2_penalty
            return weights_gradient, bias_derivative

        # add L1 regularization if enable
        elif self.regularize and self.regularization_type == 'l1':
            l1_penalty = (self.regularization_strength / number_of_samples) * np.sign(self.weights)
            weights_gradient = unregularized_weights_gradient + l1_penalty
            return weights_gradient, bias_derivative
            
        # without any regularization
        else:
            # note that weights_gradient == unregularized_term if no regularization is applied
            weights_gradient = unregularized_weights_gradient
            return weights_gradient, bias_derivative

    # ---------------------------------------------------------------------------------------------------------
    
    def _update(self, weights_gradient, bias_derivative):
        if self.regularize and self.regularization_type == 'l1':
            # Gradient descent step without regularization term included, 
            # because regularization is handled by proximal step below
            temp_weights = self.weights - (self.learning_rate * weights_gradient)
            # Soft-thresholding (proximal operator)
            threshold = self.learning_rate * self.regularization_strength / len(temp_weights)  # or number_of_samples
            
            self.weights = np.sign(temp_weights) * np.maximum(np.abs(temp_weights) - threshold, 0)
            self.bias = self.bias - (self.learning_rate * bias_derivative)
        else:
            self.weights = self.weights - (self.learning_rate * weights_gradient)
            self.bias = self.bias - (self.learning_rate * bias_derivative)

    # ---------------------------------------------------------------------------------------------------------
    
    def _init_parameters(self, n_features):
        self.weights = np.random.normal(0.0, 0.01, size=n_features)
        self.bias = 0

    # ---------------------------------------------------------------------------------------------------------
    
    def fit(self, X, y):
        self._init_parameters(X.shape[1])

        for iteration in range(self.number_of_iterations):
            y_hat = self._forward_propagation(X, y)
            weights_derivative, bias_derivative = self._backward_propagation(X, y, y_hat)
            self._update(weights_derivative, bias_derivative)

        return self

    # ---------------------------------------------------------------------------------------------------------
    
    def predict(self, X):
        predictions = X @ self.weights + self.bias
        return predictions

    # ---------------------------------------------------------------------------------------------------------
    
    def plot_losses(self, ax=None):
        if ax is None:
            ax = plt.gca()
        
        sns.lineplot(self.cost_history, label=f'Final cost: {self.cost_history[-1]:.5f}', ax=ax)
        ax.set_xlabel('Iterations')
        ax.set_ylabel('Cost')
        ax.set_title('Training Cost Vs Iteration')
        ax.grid()
        ax.legend()