# Linear regression

An implementation of linear regression via least squares approximation.

Sebastian Thomas

In [None]:
# data
import numpy as np

# machine learning
from sklearn.base import BaseEstimator, RegressorMixin

In [None]:
# linear regression via least squares approximation
class LinearRegression(BaseEstimator, RegressorMixin):
    
    def __init__(self, fit_intercept=True):
        self.fit_intercept = fit_intercept
 
    def _add_intercept_entries(self, X):
        return np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    
    def fit(self, X_train, y_train):
        if self.fit_intercept:
            X_train = self._add_intercept_entries(X_train)
        
        self.coef_ = np.linalg.solve(np.matmul(X_train.transpose(), X_train),
                                     np.matmul(X_train.transpose(), y_train))
        
        return self
    
    def predict(self, X):
        if self.fit_intercept:
            X = self._add_intercept_entries(X)

        return np.matmul(X, self.coef_)

In [None]:
# example
import math

from sklearn.datasets import load_boston
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

(X, y) = load_boston(return_X_y=True)

(X_train, X_test, y_train, y_test) = train_test_split(X, y, random_state=0)

regressor = make_pipeline(StandardScaler(), LinearRegression())
regressor.fit(X_train, y_train)

print('R squared:', r2_score(y_test, regressor.predict(X_test)))
print('root mean squared error:', math.sqrt(mean_squared_error(y_test, regressor.predict(X_test))))