## Data

In [55]:
# Import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [17]:
from sklearn.datasets import load_boston    # 506 samples, 13 feature

df = load_boston()
x = df.data
y = df.target

## Feature Selection

In [23]:
# pearson's correlation feature selection for numeric input and numeric output
from sklearn.datasets import make_regression
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

In [42]:
# define feature selection
fs = SelectKBest(score_func = f_regression, k = 10)

In [43]:
# apply feature selection
x_selected = fs.fit_transform(x, y)
print(x_selected.shape)

(506, 10)


## Normalize Data

In [51]:
from sklearn.preprocessing import MinMaxScaler

In [52]:
scaler = MinMaxScaler()

In [53]:
x = scaler.fit_transform(x_selected)


## Divide into Train and Test Data

In [54]:
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(x, y, test_size = 0.2, random_state = 87)

In [57]:
print("Size of data X train matrix %s"  % str(train_X.shape))
print("Size of data Y train array %s"  % str(train_y.shape))
print("Size of data X test matrix %s"  % str(test_X.shape))
print("Size of data Y test array %s"  % str(test_y.shape))

Size of data X train matrix (404, 10)
Size of data Y train array (404,)
Size of data X test matrix (102, 10)
Size of data Y test array (102,)


In [61]:
## Add bias

In [58]:
one_column_train = np.ones((train_X.shape[0], 1))
train_X_new = np.append(one_column_train, train_X, axis = 1) # Add bias

one_column_test = np.ones((test_X.shape[0], 1))
test_X_new = np.append(one_column_test, test_X, axis = 1)

In [59]:
print("Size of data X train matrix %s"  % str(train_X_new.shape))
print("Size of data Y train array %s"  % str(train_y.shape))
print("Size of data X test matrix %s"  % str(test_X_new.shape))
print("Size of data Y test array %s"  % str(test_y.shape))

Size of data X train matrix (404, 11)
Size of data Y train array (404,)
Size of data X test matrix (102, 11)
Size of data Y test array (102,)


## Build model

In [60]:
def np_cost_vectorized(w, X, y):
    '''
    Evaluate the cost function in a vectorized manner for 
    inputs `X` and targets `t`, at weights `w` and `b`.
    
    X: dataset matrix has (m, n) dimension. 
    y: targets vector has (n, ) dimension.
    w: weights vector has (n, ) dimension
    b: a scalar bias.
    
    Return a scalar cost value of `w`, `b`.
    '''
    
    m = X.shape[0] # number of samples in dataset
    w = np.array(w) # convert to numpy array
    y_hat = np.dot(X, w) # hypothesis
    
    return np.sum((y_hat - y)**2)/(2*m)

In [63]:
def np_grad_fn_vectorized(w, X, y):
    '''
    Given `w` - a current "Guess" of what our weights should be
          `X` - matrix of shape (m, n + 1) of input features
          `y` - target y values
    Return gradient of each weight evaluated at the current value
    '''
    
    #TODO: Complete the below followed the above expressions
    m, n = X.shape
    y_hat = np.dot(X, w)
    grad_w = np.dot(X.T, y_hat - y)/m
    
    return grad_w

In [64]:
def np_solve_via_gradient_descent(X, y, print_every=5000,
                                  niter=100000, alpha=0.005):
    '''
    Given `X` - matrix of shape (m, n+1) of input features
          `y` - target y values
    Solves for linear regression weights.
    Return weights after `niter` iterations.
    '''
    m, n = X.shape
    # initialize all the weights to zeros
    w = np.zeros((n,))
    for k in range(niter):
        
        dw = np_grad_fn_vectorized(w, X, y) 
        w = w - alpha*dw
        
        if k % print_every == 0:
            print('Weight after %d iteration: %s' % (k, str(w)))
    return w

In [65]:
opt_w = np_solve_via_gradient_descent(train_X_new, train_y, niter=500000)

Weight after 0 iteration: [0.11068564 0.00289013 0.01646994 0.03786181 0.03482141 0.06114533
 0.0696155  0.03603987 0.04115908 0.06437591 0.02720058]
Weight after 5000 iteration: [ 17.68349016  -2.91609326   5.81528695  -1.36067871  -0.63287267
  18.6971211    1.8358486   -0.85504806  -3.08287965  -2.31298171
 -10.56191088]
Weight after 10000 iteration: [ 18.68859219  -4.01120425   3.58410881  -0.61196256  -0.57619495
  22.1059845    1.71567547   1.53459542  -3.39120718  -5.56140081
 -15.26504738]
Weight after 15000 iteration: [ 19.37263669  -4.83799158   2.06603354   0.06771047  -0.99680849
  23.46085126   1.70269987   2.84668913  -3.88356093  -6.96536053
 -17.52861825]
Weight after 20000 iteration: [ 19.89194534  -5.48741957   1.19391933   0.63001217  -1.51797153
  23.92590907   1.71594898   3.69663492  -4.34803966  -7.69051108
 -18.65670543]
Weight after 25000 iteration: [ 20.3052759   -6.01931847   0.72056777   1.08845795  -2.00549639
  23.99173807   1.75114831   4.30858136  -4.754

In [66]:
opt_w

array([ 22.72993016,  -9.7357898 ,   0.35018249,   3.22646301,
        -4.37558198,  21.55030308,   2.33631453,   7.44400572,
        -7.39431132,  -9.74937851, -20.52830037])

In [67]:
print("Training cost:", np_cost_vectorized(opt_w, train_X_new, train_y))

Training cost: 12.316321777609623


In [68]:
print("Training cost:", np_cost_vectorized(opt_w, train_X_new, train_y))

Training cost: 12.316321777609623
