# Quiz regression models

In [64]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.metrics import r2_score

In [16]:
print(load_boston().DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [3]:
X, y = load_boston(return_X_y=True)
print(X.shape)

(506, 13)


In [7]:
len(y)

506

In [13]:
model = LinearRegression()
model.fit(X, y)
print(model.coef_)
print(model.intercept_)

[-1.08011358e-01  4.64204584e-02  2.05586264e-02  2.68673382e+00
 -1.77666112e+01  3.80986521e+00  6.92224640e-04 -1.47556685e+00
  3.06049479e-01 -1.23345939e-02 -9.52747232e-01  9.31168327e-03
 -5.24758378e-01]
36.459488385089855


In [20]:
print(f"Coeficent for romms {model.coef_[5]:.2f}")

Coeficent for romms 3.81


In [22]:
print(f"Price for house with all variables set at 0 (hypothetical) {model.intercept_ * 1000:.2f}")

Price for house with all variables set at 0 (hypothetical) 36459.49


In [41]:
y[0] # true price for house 0

24.0

In [43]:
# model.intercept_

In [44]:
# model.coef_ 

In [45]:
# X[0]

In [35]:
# np.array([1, 2, 3]) * np.array([1, 2, 4]) 

In [57]:
model.intercept_ + sum( model.coef_  * X[0]) # prediceted price from the model

20.468708469605996

In [53]:
X[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])

In [61]:
y_true = y

In [58]:
y_predicted = []
for i in range(X.shape[0]):
    y_predicted.append(model.intercept_ + sum( model.coef_  * X[i]))
y_predicted = np.array(y_predicted)

In [68]:
print(f"coeficent of determination: {r2_score(y_true, y_predicted):.2f}")

coeficent of determination: 0.74
