In [146]:
import numpy as np
import pandas as pd
from numpy.linalg import inv
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from statsmodels.regression.linear_model import OLS

In [147]:
boston = load_boston()
X = boston.data
y = boston.target

In [129]:
print(X.shape)

(506, 13)


In [130]:
feature_names = boston.feature_names
print(feature_names)

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


In [131]:
int = np.ones(shape=X.shape[0])[..., None]

In [132]:
int.shape

(506, 1)

In [133]:
X = np.concatenate((int, X), 1)

In [134]:
X.shape

(506, 14)

In [135]:
X[0]

array([1.000e+00, 6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01,
       6.575e+00, 6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01,
       3.969e+02, 4.980e+00])

In [136]:
# calculate coefficients using closed-form solution
coeffs = inv(X.transpose().dot(X)).dot(X.transpose()).dot(y)

In [137]:
coeffs

array([ 3.64594884e+01, -1.08011358e-01,  4.64204584e-02,  2.05586264e-02,
        2.68673382e+00, -1.77666112e+01,  3.80986521e+00,  6.92224640e-04,
       -1.47556685e+00,  3.06049479e-01, -1.23345939e-02, -9.52747232e-01,
        9.31168327e-03, -5.24758378e-01])

In [138]:
feature_names = np.insert(boston.feature_names, 0, 'INT')

# collect results into a DataFrame for pretty printing
results = pd.DataFrame({'coeffs':coeffs}, index=feature_names)

print(results.round(2))

         coeffs
INT       36.46
CRIM      -0.11
ZN         0.05
INDUS      0.02
CHAS       2.69
NOX      -17.77
RM         3.81
AGE        0.00
DIS       -1.48
RAD        0.31
TAX       -0.01
PTRATIO   -0.95
B          0.01
LSTAT     -0.52


In [139]:
LR = LinearRegression(fit_intercept=False)

In [140]:
LR.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None, normalize=False)

In [141]:
coeffs_lr = LR.coef_

In [142]:
results['coeffs_lr'] = coeffs_lr

print(results.round(2))

         coeffs  coeffs_lr
INT       36.46      36.46
CRIM      -0.11      -0.11
ZN         0.05       0.05
INDUS      0.02       0.02
CHAS       2.69       2.69
NOX      -17.77     -17.77
RM         3.81       3.81
AGE        0.00       0.00
DIS       -1.48      -1.48
RAD        0.31       0.31
TAX       -0.01      -0.01
PTRATIO   -0.95      -0.95
B          0.01       0.01
LSTAT     -0.52      -0.52


In [144]:
coeffs_lm = OLS(y, X).fit().params

In [145]:
results['coeffs_lm'] = coeffs_lm

print(results.round(2))

         coeffs  coeffs_lr  coeffs_lm
INT       36.46      36.46      36.46
CRIM      -0.11      -0.11      -0.11
ZN         0.05       0.05       0.05
INDUS      0.02       0.02       0.02
CHAS       2.69       2.69       2.69
NOX      -17.77     -17.77     -17.77
RM         3.81       3.81       3.81
AGE        0.00       0.00       0.00
DIS       -1.48      -1.48      -1.48
RAD        0.31       0.31       0.31
TAX       -0.01      -0.01      -0.01
PTRATIO   -0.95      -0.95      -0.95
B          0.01       0.01       0.01
LSTAT     -0.52      -0.52      -0.52
