### Housing Data Description:
1. Title: Boston Housing Data

2. Sources:
   (a) Origin:  This dataset was taken from the StatLib library which is
                maintained at Carnegie Mellon University.
   (b) Creator:  Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the 
                 demand for clean air', J. Environ. Economics & Management,
                 vol.5, 81-102, 1978.
   (c) Date: July 7, 1993

3. Past Usage:
   -   Used in Belsley, Kuh & Welsch, 'Regression diagnostics ...', Wiley, 
       1980.   N.B. Various transformations are used in the table on
       pages 244-261.
    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.
       In Proceedings on the Tenth International Conference of Machine 
       Learning, 236-243, University of Massachusetts, Amherst. Morgan
       Kaufmann.

4. Relevant Information:

   Concerns housing values in suburbs of Boston.

5. Number of Instances: 506

6. Number of Attributes: 13 continuous attributes (including "class"
                         attribute "MEDV"), 1 binary-valued attribute.

7. Attribute Information:

    1. CRIM      per capita crime rate by town
    2. ZN        proportion of residential land zoned for lots over 
                 25,000 sq.ft.
    3. INDUS     proportion of non-retail business acres per town
    4. CHAS      Charles River dummy variable (= 1 if tract bounds 
                 river; 0 otherwise)
    5. NOX       nitric oxides concentration (parts per 10 million)
    6. RM        average number of rooms per dwelling
    7. AGE       proportion of owner-occupied units built prior to 1940
    8. DIS       weighted distances to five Boston employment centres
    9. RAD       index of accessibility to radial highways
    10. TAX      full-value property-tax rate per $10,000
    11. PTRATIO  pupil-teacher ratio by town
    12. B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks 
                 by town
    13. LSTAT    % lower status of the population
    14. MEDV     Median value of owner-occupied homes in $1000's

8. Missing Attribute Values:  None.




In [150]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
import time
import csv
from sklearn.preprocessing import StandardScaler

def load_dataset(filename):
    '''Loads an example of market basket transactions from a provided csv file.

    Returns: A list (database) of lists (transactions). Each element of a transaction is 
    an item.
    '''

    with open(filename,'r') as dest_f:
        data_iter = csv.reader(dest_f, delimiter = ',', quotechar = '"')
        data = [data for data in data_iter]
        data_array = np.asarray(data)
        
    return data_array


def linear_grad_func(theta, x, y):
    # TODO compute gradient
    # m = x.shape[0]
    # h = linear_val_func(theta, x)
    # a1 = h - y
    # a2 = np.c_[np.ones(x.shape[0]), x]
    # temp_multiply = a1 * a2
    #
    # grad = np.sum(temp_multiply, axis = 0) / m
    m = y.size
    y_hat = linear_val_func(theta, x)
    x = np.c_[np.ones(x.shape[0]), x]
    grad = np.sum((y_hat - y) * x, axis = 0) * 1.0 / m
    # grad = np.sum((linear_val_func(theta, x) - y) * np.c_[np.ones(x.shape[0]), x], axis = 0) * 1.0 / x.shape[0]
    return grad


def linear_val_func(theta, x):
    # forwarding
    return np.dot(np.c_[np.ones(x.shape[0]), x], theta.T)  # return a column


def linear_cost_func(theta, x, y):
    # TODO compute cost (loss)
    m = x.shape[0]
    h = linear_val_func(theta, x)
    cost = np.sum((h - y) ** 2) / (2 * m)
    return cost


def linear_grad_desc(theta, X_train, Y_train, lr=0.1, max_iter=1000, converge_change=.00005):

    cost_iter = []
    cost = linear_cost_func(theta, X_train, Y_train)
    cost_iter.append([0, cost])
    cost_change = 1
    i = 1
    while cost_change > converge_change and i< max_iter:
        pre_cost = cost
        # compute gradient
        grad = linear_grad_func(theta, X_train, Y_train)

        # TODO Update gradient
        theta = theta - lr * grad


        cost = linear_cost_func(theta, X_train, Y_train)
        cost_iter.append([i, cost])
        cost_change = abs(cost - pre_cost)
        i += 1

    return theta, cost_iter


def linear_regression(X, Y):

    # split dataset into training and testing
    X_train = X[:-20, :]
    X_test = X[-20:, :]

    Y_train = Y[:-20, None]
    Y_test = Y[-20:, None]


    # Linear regression
    theta = np.random.rand(1, X_train.shape[1]+1)  # init, 1 x (shape[1] + 1) , 1 x 2, a row
    fitted_theta, cost_iter = linear_grad_desc(theta, X_train, Y_train, lr=0.1, max_iter=1000) # lr=0.0004
    
    
    # print('fit_theta.shape: {}'.format(fitted_theta.shape))
    print('Coefficients: {}'.format(fitted_theta[0,1:]))
    print('Intercept: {}'.format(fitted_theta[0,0]))
    print('RMSE: {}'.format(np.sqrt(np.sum((linear_val_func(fitted_theta, X_test) - Y_test)**2) / Y_test.shape[0])))


def sklearn_linear_regression(X, Y):

    # split dataset into training and testing
    X_train = X[:-20, :]
    X_test = X[-20:, :]

    Y_train = Y[:-20, None] # last 20 lines
    Y_test = Y[-20:, None]

    # Linear regression
    regressor = linear_model.LinearRegression()
    regressor.fit(X_train, Y_train)
    print('Coefficients: {}'.format(regressor.coef_))
    print('Intercept: {}'.format(regressor.intercept_))
    print('MSE:{}'.format(np.sqrt(np.mean((regressor.predict(X_test) - Y_test) ** 2))))




dataset = datasets.load_diabetes() <br>
X = dataset.data[:, 2] #Select only 2 dims <br>
Y = dataset.target <br>
<br>
#split dataset into training and testing <br>
X_train = X[:-20, None] # if there is a "None", then the shape is (422, 1); if no "None", the shape is (422,) <br>
X_train.shape <br>

In [151]:
dataset = load_dataset("housing.csv")
dataset = dataset.astype(float) # try this one?

X = dataset[:, :-1]
Y = dataset[:, -1]

In [152]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [153]:
X

array([[-0.41978169,  0.28482986, -1.2879095 , ..., -1.45900038,
         0.44105193, -1.0755623 ],
       [-0.41733901, -0.48772236, -0.59338101, ..., -0.30309415,
         0.44105193, -0.49243937],
       [-0.41734134, -0.48772236, -0.59338101, ..., -0.30309415,
         0.39642699, -1.2087274 ],
       ..., 
       [-0.41344632, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.98304761],
       [-0.40776382, -0.48772236,  0.11573841, ...,  1.17646583,
         0.4032249 , -0.86530163],
       [-0.41499991, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.66905833]])

In [154]:
linear_regression(X, Y)

fit_theta.shape: (1, 14)
Coefficients: [-0.86997643  1.12646105  0.0869309   0.66987002 -1.94926057  2.66630715
  0.17462796 -3.08224541  2.74998974 -2.31249044 -1.94220404  0.81963946
 -3.92412486]
Intercept: 22.5296725975
MSE: 16.8481478038


In [155]:
sklearn_linear_regression(X, Y)

Coefficients: [[-0.88280743  1.15359385  0.15776754  0.65753378 -1.94403148  2.64605414
   0.19130621 -3.06590674  3.04944061 -2.64326581 -1.95317192  0.81788036
  -3.93888666]]
Intercept: [ 22.52074855]
MSE:17.8826861849


In [67]:
X.shape

(506, 13)

In [91]:
np.random.rand(1, 3)

array([[ 0.72207103,  0.90535399,  0.57333056]])

In [92]:
?np.c_

In [157]:
np.sqrt(9)

3.0