In [1]:
import numpy as np

In [2]:
def propagate(coef, intercept, X, y):
    """
    Calculates the gradients and cost for the `LinearRegression` model.
    Args:
        - coef (numpy array): Model coefficients of shape (n,).
        - intercept (float): Model intercept.
        - X (numpy array): Training features of shape (m, n).
        - y (numpy array): Training labels of shape (m,).

    Returns:
    - gradients (dictionary): Gradients for coefficients and intercept.
    - cost (numpy array): Cost associated with the model.
    """
    num_samples = X.shape[0]
    y_hat = np.dot(X, coef.T) + intercept
    cost = np.sum((y_hat - y) ** 2) / num_samples
    dM = -np.dot((y - y_hat), X) / num_samples # Gradient of coefficients.
    dC = -np.sum((y - y_hat)) / num_samples
    
    gradients ={
        "dM": dM,
        "dC": dC
    }
    
    return cost, gradients

In [3]:
class LinearRegression:
    """
    A simple linear regression model for fitting a straight line to data points.

    Attributes:
        coef_ (numpy.ndarray): Coefficients of the linear model.
        intercept_ (float): Intercept of the linear model.

    Methods:
        fit(X, y): Fit the linear regression model to the training data.
        predict(X): Predict output for new input data.
        score(X, y): Calculate the coefficient of determination (R^2) of the prediction.

    """

    def __init__(self):
        """
        Initialize the LinearRegression object.
        """
        self.coef_ = None
        self.intercept_ = None
        self.cost = []

    def fit(self, X, y, num_iterations=1000, learning_rate=0.01):
        """
        Fit the linear regression model to the training data.

        Args:
            X (numpy.ndarray): Input features of shape (n_samples, n_features).
            y (numpy.ndarray): Target values of shape (n_samples,).

        Returns:
            LinearRegression: Self.

        Raises:
            ValueError: If the dimensions of X and y do not match.
        """
        self.coef_ = np.zeros(X.shape[1])
        self.intercept_ = 0
        for _ in range(num_iterations):
            cost, gradients = propagate(self.coef_, self.intercept_, X, y)
            dM = gradients["dM"]
            dC = gradients["dC"]
            self.coef_ -= learning_rate * dM
            self.intercept_ -= learning_rate * dC
            self.cost.append(cost)

    def predict(self, X):
        """
        Predict output for new input data.

        Args:
            X (numpy.ndarray): Input features of shape (n_samples, n_features).

        Returns:
            numpy.ndarray: Predicted target values of shape (n_samples,).
        """
        y_hat = np.dot(X, self.coef_) + self.intercept_
        return y_hat



In [12]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [5]:
import plotly.express as px

In [6]:
X, y = make_regression(n_samples=1000, n_features=5, noise=10, random_state=42)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [8]:
model = LinearRegression()

In [9]:
model.fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)

In [13]:
r2_score(y_test, y_pred)

0.9729749053149263

In [14]:
# Now comparing with `sklearn` implementation
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()

In [15]:
lin_reg.fit(X_train, y_train)
lin_reg.predict(X_test)
y_pred = lin_reg.predict(X_test)