In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy import stats
from sklearn.linear_model import LinearRegression

%matplotlib inline

$y = X \cdot \beta+ \epsilon$

$\hat{y} = X \cdot \hat{\beta}$

$\hat{\epsilon}  = y - \hat{y}$

$\hat{\beta} = (X^T \cdot X)^{-1} \cdot (X^T \cdot y) $

$\sigma^{2}  =  \frac{\sum \hat{\epsilon}^{2}}{n-k}$

$\hat{Var}(\hat{\beta}) = \sigma^{2}  (X^T \cdot X)^{-1} $

In [2]:
class LinearRegressionOLS():
    '''
    Ordinary Least Squares Linear Regression
    
    intercept - (default) Ture - whether to fit an intercept
    
    methods:
    fit() - trains the model
        X - numpy matrix of predictors
        y - numpy array of targets
        
    predict() - makes predictions
        X - numpy matrix of predictors
    '''
    def __init__(self,intercept = True):
        self.name = "Ordinary Least Squares Linear Regression"
        self.intercept = intercept
    
    def __str__(self):
        return "Ordinary Least Squares Linear Regression"
    
    def fit(self,X,y):
        if self.intercept:
            X = np.insert(X,0,1,axis =1)
        self.coefs = np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,y))
        self.yhat = self.predict_internal(X)
        self.sigma_sqrt = np.sum((y - self.yhat)**2)/(X.shape[0] - X.shape[1])

        self.var_beta = self.sigma_sqrt * np.linalg.inv(np.dot(X.T,X))
        self.std_beta = np.sqrt(np.diagonal(self.var_beta ))
        self.t_stat = (self.coefs.T/self.std_beta).flatten()
        self.p_val = (1 - stats.t.cdf(np.abs(self.t_stat),(X.shape[0] - X.shape[1]))) * 2
    
    def predict_internal(self,X):
        return np.dot(X,self.coefs)
    
    def predict(self,X):
        if self.intercept:
            X = np.insert(X,0,1,axis =1)
        return np.dot(X,self.coefs)
    

In [3]:
X = np.arange(100).reshape(-1,1)
y = 5 + 2*X

In [4]:
lin_reg = LinearRegression(fit_intercept=True)
lin_reg.fit(X, y)
lin_reg.intercept_,lin_reg.coef_

(array([5.]), array([[2.]]))

In [5]:
myLinearRegression = LinearRegressionOLS()
myLinearRegression.fit(X,y)
myLinearRegression.coefs

array([[5.],
       [2.]])