### The Boston Housing Dataset
https://archive.ics.uci.edu/ml/datasets/Housing

In [31]:
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import numpy as np

In [37]:
boston = load_boston()
boston["data"]

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          1.53000000e+01,   3.96900000e+02,   4.98000000e+00],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.96900000e+02,   9.14000000e+00],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.92830000e+02,   4.03000000e+00],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   5.64000000e+00],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.93450000e+02,   6.48000000e+00],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   7.88000000e+00]])

In [38]:
x_data = boston.data
y_data = boston.target.reshape(boston.target.size,1)


In [40]:
y_data.shape

(506, 1)

In [43]:
from sklearn import preprocessing

minmax_scale = preprocessing.MinMaxScaler(feature_range=(0,5)).fit(x_data)
# standard_scale = preprocessing.StandardScaler().fit(x_data)
x_scaled_data = minmax_scale.transform(x_data)

x_scaled_data[:3]

array([[  0.00000000e+00,   9.00000000e-01,   3.39076246e-01,
          0.00000000e+00,   1.57407407e+00,   2.88752635e+00,
          3.20803296e+00,   1.34601570e+00,   0.00000000e+00,
          1.04007634e+00,   1.43617021e+00,   5.00000000e+00,
          4.48399558e-01],
       [  1.17961270e-03,   0.00000000e+00,   1.21151026e+00,
          0.00000000e+00,   8.64197531e-01,   2.73998850e+00,
          3.91349125e+00,   1.74480990e+00,   2.17391304e-01,
          5.24809160e-01,   2.76595745e+00,   5.00000000e+00,
          1.02235099e+00],
       [  1.17848872e-03,   0.00000000e+00,   1.21151026e+00,
          0.00000000e+00,   8.64197531e-01,   3.47192949e+00,
          2.99691040e+00,   1.74480990e+00,   2.17391304e-01,
          5.24809160e-01,   2.76595745e+00,   4.94868627e+00,
          3.17328918e-01]])

In [44]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_scaled_data, y_data, test_size=0.33)

In [45]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((339, 13), (167, 13), (339, 1), (167, 1))

In [49]:
from sklearn import  linear_model

regr = linear_model.LinearRegression(fit_intercept=True, 
                                     normalize=False, 
                                     copy_X=True, 
                                     n_jobs=8)
regr.fit(X_train, y_train)
regr

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=8, normalize=False)

In [50]:
regr.coef_ , regr.intercept_

(array([[-1.74870516,  0.81420611,  0.32023416,  0.45346266, -1.87298952,
          3.43891391,  0.40473249, -2.85783169,  1.68116717, -1.52859257,
         -2.16198668,  0.80534714, -4.13197108]]), array([ 27.62538012]))

In [51]:
# # The coefficients
print('Coefficients: ', regr.coef_)
print('intercept: ', regr.intercept_)   

Coefficients:  [[-1.74870516  0.81420611  0.32023416  0.45346266 -1.87298952  3.43891391
   0.40473249 -2.85783169  1.68116717 -1.52859257 -2.16198668  0.80534714
  -4.13197108]]
intercept:  [ 27.62538012]


In [55]:
regr.predict(x_data[:5])

array([[-105.48256773],
       [ -54.38159466],
       [ -41.12185837],
       [ -16.38047559],
       [ -20.5797399 ]])

In [56]:
x_data[:5].dot(regr.coef_.T) + regr.intercept_

array([[-105.48256773],
       [ -54.38159466],
       [ -41.12185837],
       [ -16.38047559],
       [ -20.5797399 ]])

In [57]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error


In [60]:
y_true = y_test
y_hat = regr.predict(X_test)

r2_score(y_true, y_hat), mean_absolute_error(y_true, y_hat), mean_squared_error(y_true, y_hat)

(0.76808552219172577, 3.2454326937895184, 19.12676574317053)

In [61]:
y_true = y_train
y_hat = regr.predict(X_train)

r2_score(y_true, y_hat), mean_absolute_error(y_true, y_hat), mean_squared_error(y_true, y_hat)

(0.72227931169968707, 3.4135692282097083, 23.704137714926716)