### The Boston Housing Dataset
https://archive.ics.uci.edu/ml/datasets/Housing

In [1]:
from sklearn.datasets import fetch_california_housing
import matplotlib.pyplot as plt
import numpy as np

In [3]:
boston = fetch_california_housing()
boston["data"]

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [4]:
x_data = boston.data
y_data = boston.target.reshape(boston.target.size,1)


In [5]:
y_data.shape

(20640, 1)

In [6]:
from sklearn import preprocessing

minmax_scale = preprocessing.MinMaxScaler(feature_range=(0,5)).fit(x_data)
# standard_scale = preprocessing.StandardScaler().fit(x_data)
x_scaled_data = minmax_scale.transform(x_data)

x_scaled_data[:3]

array([[2.69834209, 3.92156863, 0.21756151, 0.10234331, 0.04470417,
        0.00749713, 2.83740701, 1.05577689],
       [2.69013531, 1.96078431, 0.19111977, 0.09464632, 0.33605202,
        0.00570372, 2.82678002, 1.06075697],
       [2.33014027, 5.        , 0.26378232, 0.10970054, 0.06908826,
        0.00848979, 2.82146652, 1.05079681]])

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_scaled_data, y_data, test_size=0.3)

In [20]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((14448, 8), (6192, 8), (14448, 1), (6192, 1))

In [21]:
from sklearn import  linear_model

regr = linear_model.LinearRegression(fit_intercept=True, 
                                     copy_X=True, 
                                     n_jobs=8)
regr.fit(X_train, y_train)
regr

In [22]:
regr.coef_ , regr.intercept_

(array([[ 1.28431537,  0.0964222 , -3.05750605,  4.82368765,  0.01510173,
         -0.8152756 , -0.79365409, -0.87396946]]),
 array([3.65139849]))

In [23]:
# # The coefficients
print('Coefficients: ', regr.coef_)
print('intercept: ', regr.intercept_)   

Coefficients:  [[ 1.28431537  0.0964222  -3.05750605  4.82368765  0.01510173 -0.8152756
  -0.79365409 -0.87396946]]
intercept:  [3.65139849]


In [24]:
regr.predict(x_data[:5])

array([[ 81.42236341],
       [113.26074369],
       [ 79.82315243],
       [ 86.45483611],
       [ 83.17002008]])

In [25]:
x_data[:5].dot(regr.coef_.T) + regr.intercept_

array([[ 81.42236341],
       [113.26074369],
       [ 79.82315243],
       [ 86.45483611],
       [ 83.17002008]])

In [26]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [27]:
y_true = y_test
y_hat = regr.predict(X_test)

r2_score(y_true, y_hat), mean_absolute_error(y_true, y_hat), mean_squared_error(y_true, y_hat)

(0.5851098673215536, 0.5391035673826492, 0.5587285464708694)

In [28]:
y_true = y_train
y_hat = regr.predict(X_train)

r2_score(y_true, y_hat), mean_absolute_error(y_true, y_hat), mean_squared_error(y_true, y_hat)

(0.6142578806997642, 0.5248485781873539, 0.5111265482210912)