In [1]:
"""Base for Linear Model"""


import numpy as np


class LinearRegression:
    """
    Ordinary least square Linear Regression.

    Parameters
    ----------
    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model.
        If set to False, no intercept will be used in calculations
        i.e. data is expected to be centered (manually)

    Attributes
    ----------
    coef_ : array of shape (n_features,)
        Estimated coef. for the linear regression problem.

    intercept_ : float
        Independent term in the linear model.
        Set to 0.0 if `fit_intercept = False`
    
    Examples
    --------
    >>> import numpy as np
    >>> from ml_from_scratch.linear_model import LinearRegression
    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
    >>> y = np.dot(X, np.array([1, 2])) + 3
    >>> reg = LinearRegression().fit(X, y)
    >>> reg.coef_
    array([1., 2.])
    >>> reg.intercept_
    3.0...
    >>> reg.predict(np.array([[3, 5]]))
    array([16.])
    """
    def __init__(
        self,
        fit_intercept=True
    ):
        self.fit_intercept = fit_intercept

    def fit(self, X, y):
        """
        Fit linear model.
        
        Parameters
        ----------
        X : {array-like} of shape (n_samples, n_features)
            Training data

        y : array-like of shape (n_samples,)
            Target values

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # Prepare data
        X = np.array(X).copy()
        y = np.array(y).copy()
        
        # Extract size
        n_samples, n_features = X.shape

        # Create the design matrix, A
        if self.fit_intercept:
            # Create A
            A = np.column_stack((X, np.ones(n_samples)))
        else:
            # Create A
            A = X

        # Solve for model parameters, theta
        theta = np.linalg.inv(A.T @ A) @ A.T @ y

        # Extract model parameters
        if self.fit_intercept:
            self.coef_ = theta[:n_features]
            self.intercept_ = theta[-1]

        else:
            self.coef_ = theta
            self.intercept_ = 0.0

    def predict(self, X):
        """
        Predict using the linear model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Samples

        Returns
        -------
        y_pred : array, shape (n_samples)
            Returns predicted values
        """
        X = np.array(X)
        y_pred = np.dot(X, self.coef_) + self.intercept_
        
        return y_pred


In [10]:
def _soft_thresholding(rho_j, z_j, lamda):
    """
    Soft threshold function used for normalized data and Lasso regression
    """
    if (rho_j < -lamda):
        theta_j = rho_j + lamda
    elif (-lamda <= rho_j) and (rho_j <= lamda):
        theta_j = 0
    else:
        theta_j = rho_j - lamda

    return theta_j

class Lasso(LinearRegression):
    def __init__ (self, fit_intercept=True, alpha=1.0, max_iter=1000, tol=1e-4):
        super().__init__(fit_intercept=fit_intercept)
        self.alpha=alpha
        self.max_iter = max_iter
        self.tol = tol

    def fit(self, X, y):
        # Prepare the data
        X = np.array(X).copy()
        y = np.array(y).copy()

        # extract the size
        n_samples, n_features = X.shape

        # create design matrix
        if self.fit_intercept:
            A = np.column_stack((X, np.ones(3)))
            n_features += 1
        else:
            A = X

        # Initialized theta
        theta = np.zeros(n_features)

        #
        for iter in range(self.max_iter):
            for j in range(n_features):
                # Extract the model data
                X_j = A[:, j]
                X_k = np.delete(A, j, axis=1)
                theta_k = np.delete(theta, j)

                res_j = y - np.dot(X_k, theta_k)
                rho_j = np.dot(X_j, res_j)

                # compute zj
                zj = np.dot(X_j, X_j)

                # Compute new theta
                if self.fit_intercept:
                    if j == (n_features-1):
                        theta[j] = rho_j
                    else:
                        theta[j] = _soft_thresholding(rho_j, z_j, n_samples*self.alpha)
                else:
                    theta[j] = _soft_thresholding(rho_j, z_j, n_samples*self.alpha)

                theta[j] /= z_j_


In [None]:
reg = Lasso()

In [5]:
X = np.array([[0,0],[1,1],[2,2]])
y = np.array([0,1,2])



In [7]:
np.column_stack((X, np.ones(3)))

array([[0., 0., 1.],
       [1., 1., 1.],
       [2., 2., 1.]])