# Load Boston Housing Dataset
https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html#sklearn.datasets.load_boston

In [25]:
from sklearn.datasets import load_boston
house_data = load_boston()

print(house_data['DESCR'])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [7]:
# How many houses are there (N)
N = len(house_data['data'])
print(N)

506


In [24]:
# A house has how many attributes(or features or signals or dimensions or columns)
n = len(house_data.feature_names)
print(house_data.feature_names)
# All attributes together make up a row (or input or x)

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


In [8]:
# Whats the Median value of the first house (target or label or output y)
print(house_data['target'][0])

24.0


In [26]:
# Whats the value of feature RM (average number of rooms per dwelling) for the last house
# TBD
print(house_data['data'][-1][5])

6.03


#### House 8 all features
$
\begin{align}
X^{8} 
\end{align}
$


In [18]:
house_data['data'][8]

array([2.1124e-01, 1.2500e+01, 7.8700e+00, 0.0000e+00, 5.2400e-01,
       5.6310e+00, 1.0000e+02, 6.0821e+00, 5.0000e+00, 3.1100e+02,
       1.5200e+01, 3.8663e+02, 2.9930e+01])

#### House 8,  feature 2
(INDUS proportion of non-retail business acres per town)

$
\begin{align}
X_{2}^{8}
\end{align}
$


In [19]:
house_data['data'][8][2]

7.87

#### All houses feature 4
(NOX nitric oxides concentration)

$
\begin{align}
X_{4}
\end{align}
$


In [23]:
NOX = [val[4] for val in house_data['data']]
print(NOX) 

[0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524, 0.524, 0.524, 0.524, 0.524, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.538, 0.499, 0.499, 0.499, 0.499, 0.428, 0.428, 0.448, 0.448, 0.448, 0.448, 0.448, 0.448, 0.448, 0.448, 0.448, 0.439, 0.439, 0.439, 0.439, 0.41, 0.403, 0.41, 0.411, 0.453, 0.453, 0.453, 0.453, 0.453, 0.453, 0.4161, 0.398, 0.398, 0.409, 0.409, 0.409, 0.413, 0.413, 0.413, 0.413, 0.437, 0.437, 0.437, 0.437, 0.437, 0.437, 0.426, 0.426, 0.426, 0.426, 0.449, 0.449, 0.449, 0.449, 0.489, 0.489, 0.489, 0.489, 0.464, 0.464, 0.464, 0.445, 0.445, 0.445, 0.445, 0.445, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.52, 0.547, 0.547, 0.547, 0.547, 0.547, 0.547, 0.547, 0.547, 0.547, 0.581, 0.581, 0.581, 0.581, 0.581, 0.581, 0.581, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.624, 0.871, 0.871, 0.8

#### Target value for house 4
$
Y^{4}
$


In [20]:
house_data['target'][4]

36.2

Small *x* and *y* are used to represent a *feature* and *target* of a single house
#### Which among the following could be valid representation?
$
x^{4}\:, \: x_{2}\:,\:y^{4}\:,\:y_{2}
$



In [26]:
# x2 only