# Linear Regression

## 1- Creating Linear Regression Class

In [None]:
import numpy as np

class LinearRegression():
    def __init__(self, fit_method='ols', learning_rate=0.01, loss_function="mse", l1=0, l2=0, epochs=1000, min_step_size=0.001, gradient_descent='batch', batch_size=32):
        """
        Initialize the LinearRegression model with a specified fitting method.

        Parameters:
        - fit_method: The fitting method to use: "ols" for Ordinary Least Squares, "gd" for Gradient Descent.
        - learning_rate: Learning rate for Gradient Descent.
        - loss_function: Loss function to use. mse for Mean Squared Error, mae for Mean Absolute Error.
        - l1: L1 regularization parameter.
        - l2: L2 regularization parameter.
        - epochs: Number of epochs for Gradient Descent.
        - min_step_size: Minimum step size for Gradient Descent.
        - gradient_descent: Type of gradient_descent. Possible values: "batch", "stochastic", "mini-batch". 
        - batch_size: Size of batch for mini-bactch gradient descent.
        """

        # general parameters
        self.fit_method = fit_method
        self.learning_rate = learning_rate
        self.loss_function = loss_function

        # regularization parameters
        self.l1 = 0
        self.l2 = 0

        # gradient descent parameters
        self.epochs = epochs
        self.min_step_size = min_step_size
        self.gradient_descent = gradient_descent
        self.batch_size = batch_size

        # initialize weights to none
        self.weights = None # W0 will be bias.

    def fit_ols(self, X, y):
        pass

    def fit_gd(self, X, y):
        pass

    def fit_gd_batch(self, X, y):
        pass

    def fit_gd_stochastic(self, X, y):
        pass

    def fit_gd_mini_batch(self, X, y):
        pass

    def fit(self, X, y):
        """
        Fit the model to the data based on selected fit method.

        Parameters:
        - X: Input value array for training data. Should be numpy array with shape (n_samples, n_features).
        - y: Target value array for training data. Should be numpy array with shape (n_samples, ).
        """

        # Add bias terms coefficent to the X for easier bias term handling.
        X = np.c_[np.ones((X.shape[0], 1)), X]

        if self.fit_method == 'ols':
            self.fit_ols(X, y)
        elif self.fit_method == 'gd':
            self.fit_gd(X, y)
        else:
            raise ValueError("fit_method should be either 'ols' or 'gd'")


    def predict(self, X):
        """
        Predict the target values for given inputs.

        Parameters:
        - X: Input value array for prediction. Should be numpy array with shape (n_samples, n_features).

        Returns:
        - y: Predictions values for input array X. numpy array with shape (n_samples, )
        """

        if self.weights is None:
            raise ValueError("Model has not been fitted yet.")
        
        # Add bias terms coefficent to the X for prediction.
        X = np.c_[np.ones((X.shape[0], 1)), X]

        y = X @ self.weights
        return y

In [None]:
# Helper functions.
def get_gradient(X, y):
    

### A- Ordinary Least Squares

This part is taken from Ian Goodfellow, Yoshua Bengio, Aaron Courville - Deep Learning-The MIT Press (2016).\
Given the gradient of the training Mean Squared Error (MSE):

$$
\nabla_w \text{MSE}_{\text{train}} = 0 \tag{5.6}
$$

This implies:

$$
\nabla_w \left( \frac{1}{m} \| \hat{y}^{(\text{train})} - y^{(\text{train})} \|^2_2 \right) = 0 \tag{5.7}
$$

Expanding it:

$$
\frac{1}{m} \nabla_w \| X^{(\text{train})} w - y^{(\text{train})} \|^2_2 = 0 \tag{5.8}
$$

Taking the gradient with respect to \( w \):

$$
\nabla_w \left( X^{(\text{train})} w - y^{(\text{train})} \right)^{\top} \left( X^{(\text{train})} w - y^{(\text{train})} \right) = 0 \tag{5.9}
$$

This simplifies to:

$$
\nabla_w \left( w^{\top} X^{(\text{train})^{\top}} X^{(\text{train})} w - 2 w^{\top} X^{(\text{train})^{\top}} y^{(\text{train})} + y^{(\text{train})^{\top}} y^{(\text{train})} \right) = 0 \tag{5.10}
$$

Setting the gradient to zero:

$$
2 X^{(\text{train})^{\top}} X^{(\text{train})} w - 2 X^{(\text{train})^{\top}} y^{(\text{train})} = 0 \tag{5.11}
$$

Solving for \( w \):

$$
w = \left( X^{(\text{train})^{\top}} X^{(\text{train})} \right)^{-1} X^{(\text{train})^{\top}} y^{(\text{train})} \tag{5.12}
$$


In [None]:
def fit_ols(self, X, y):
    """
    Fit the model to the data using ordinary least squares fit method by calculating weights by given formula.

    Parameters:
    - X: Input value array for training data. Should be numpy array with shape (n_samples, n_features).
    - y: Target value array for training data. Should be numpy array with shape (n_samples, ).
    """

    self.weights = np.linalg.inv(X.T @ X) @ X.T @ y

# Assign it to the class method
LinearRegression.fit_ols = fit_ols

### B- Gradient Descent

#### a- Batch gradient descent

In [None]:
def fit_gd_batch(self, X, y):
    """
    Fit the model to the data using batch gradient descent method by updating weights untill convergence.
    Batch gradients use all the training data for updating weights at each step.

    Parameters:
    - X: Input value array for training data. Should be numpy array with shape (n_samples, n_features).
    - y: Target value array for training data. Should be numpy array with shape (n_samples, ).
    """

    for _ in range(self.epochs):
        pass

# Assign it to the class method
LinearRegression.fit_gd_batch = fit_gd_batch

#### b- Stochastic gradient descent

In [None]:
def fit_gd_stochastic(self, X, y):
    """
    Fit the model to the data using batch gradient descent method by updating weights untill convergence.
    Batch gradients use all the training data for updating weights at each step.

    Parameters:
    - X: Input value array for training data. Should be numpy array with shape (n_samples, n_features).
    - y: Target value array for training data. Should be numpy array with shape (n_samples, ).
    """

    for _ in range(self.epochs):
        pass

# Assign it to the class method
LinearRegression.fit_gd_stochastic = fit_gd_stochastic

#### c- Mini-batch gradient descent

In [None]:
def fit_gd_mini_batch(self, X, y):
    """
    Fit the model to the data using batch gradient descent method by updating weights untill convergence.
    Batch gradients use all the training data for updating weights at each step.

    Parameters:
    - X: Input value array for training data. Should be numpy array with shape (n_samples, n_features).
    - y: Target value array for training data. Should be numpy array with shape (n_samples, ).
    """

    for _ in range(self.epochs):
        pass

# Assign it to the class method
LinearRegression.fit_gd_mini_batch = fit_gd_mini_batch

#### Merge all together

In [None]:
def fit_gd(self, X, y):
    if self.gradient_descent == 'batch':
        self.fit_gd_batch(X, y)
    elif self.gradient_descent == 'stochastic':
        self.fit_gd_stochastic(X, y)
    elif self.gradient_descent == 'mini-batch':
        self.fit_gd_mini_batch(X, y)
    else:
        raise ValueError("Incorrect gradient_descent value. Possible values: batch, stochastic, mini-batch.")
    
# Assign it to the class method
LinearRegression.fit_gd = fit_gd