<h2>Assignment 1 - Linear Regression on Boston Housing Data</h2>

In [1]:
# The modules we're going to use
from __future__ import print_function
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
from scipy import linalg
import numpy as np
import matplotlib.pyplot as plt

# When you execute a code to plot with a simple SHIFT-ENTER, the plot will be shown directly under the code cell
%matplotlib inline

In [2]:
# Load data from scikit-learn, which returns (data, target)
# note: if you call "boston = load_boston()", it returns a dictionary-like object
data, target = datasets.load_boston(True)

# Split the data into two parts: training data and testing data
train_data,test_data,train_target,test_target = train_test_split(data,(target[:, np.newaxis]), test_size=0.2, random_state=42)

<h4>Use scikit-learn library in the following cell</h4>

In [37]:
# Task 1-1: use linear regression in sklearn
model = linear_model.LinearRegression().fit(train_data, train_target)


# Task 1-2: show intercept and coefficents
print('W0 : {:2.5}'.format(model.intercept_[0]))
coefs = model.coef_[0]
for i in range(len(coefs)):
    print('W{:<2}: {:.5f}'.format(i + 1, coefs[i]))

# Task 1-3: show errors on training dataset and testing dataset
train_predict = model.predict(train_data)
test_predict = model.predict(test_data)
train_error = mean_squared_error(train_target, train_predict)
test_error = mean_squared_error(test_target, test_predict)

print('Training Set MSE: {:.5f}'.format(train_error))
print('Test Set MSE: {:.5f}'.format(test_error))


W0 : 30.247
W1 : -0.11306
W2 : 0.03011
W3 : 0.04038
W4 : 2.78444
W5 : -17.20263
W6 : 4.43884
W7 : -0.00630
W8 : -1.44787
W9 : 0.26243
W10: -0.01065
W11: -0.91546
W12: 0.01235
W13: -0.50857
Training Set MSE: 21.64141
Test Set MSE: 24.29112


<h4>Use analytical solution (normal equation) to perform linear regression in the following cell</h4>

In [39]:
# Task 2-1: Implement a function solving normal equation 
# Inputs: Training data and  training label
# Output: Weights
def myNormalEqualFun(X,y):
    return np.matmul(np.matmul(linalg.inv(np.matmul(np.transpose(X), X)), np.transpose(X)), y)  
    
# Task 2-2: Implement a function performing prediction
# Inputs: Testing data and weights
# Output: Predictions
def myPredictFun(X,w):
    return np.matmul(X, w)

# Here we insert a column of 1s into training_data and test_data (to be consistent with our lecture slides)
train_data_intercept = np.insert(train_data, 0, 1, axis=1)
test_data_intercept = np.insert(test_data, 0, 1, axis=1)

# Here we call myNormalEqual to train the model and get weights
w = myNormalEqualFun(train_data_intercept,train_target)

# Task 2-3: show intercept and coefficents
for i in range(len(w)):
    print("W{:<2}: {:.5f}".format(i, w[i][0]))

# Task 2-4: show errors on training dataset and testing dataset
train_predict = myPredictFun(train_data_intercept, w)
test_predict = myPredictFun(test_data_intercept, w)
train_error = mean_squared_error(train_predict, train_target)
test_error = mean_squared_error(test_predict, test_target)

print('Training Set MSE: {:.5f}'.format(train_error))
print('Test Set MSE: {:.5f}'.format(test_error))

W0 : 30.24675
W1 : -0.11306
W2 : 0.03011
W3 : 0.04038
W4 : 2.78444
W5 : -17.20263
W6 : 4.43884
W7 : -0.00630
W8 : -1.44787
W9 : 0.26243
W10: -0.01065
W11: -0.91546
W12: 0.01235
W13: -0.50857
Training Set MSE: 21.64141
Test Set MSE: 24.29112


<h4>Use numerical solution (baisc gradient descent) to perform linear regression in the following cell</h4>

In [7]:
# Feature scaling
scaler = preprocessing.StandardScaler().fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)


# Task 3-1: Implement a function performing gradient descent
# Inputs: Training data, training label, leaerning rate, number of iterations
# Output: the final Weights
#         the loss history along iterations
def myGradientDescentFun(X,y,learning_rate,numItrs):
    return 

# Task 3-2: Implement a function performing prediction
# Inputs: Testing data and weights
# Output: Predictions
def myPredictFun(X,w):
    return

# Here we insert a column of 1s into training_data and test_data (to be consistent with our lecture slides)
train_data_intercept = np.insert(train_data, 0, 1, axis=1)
test_data_intercept = np.insert(test_data, 0, 1, axis=1)

# Here we call myGradientDescentFun to train the model and get weights
# Note: you need to figure out good learning rate value and the number of iterations
w, loss = myGradientDescentFun(train_data_intercept,train_target,0,0)

# Task 3-3: show intercept and coefficents


# Task 3-4: show errors on training dataset and testing dataset


# Task 3-5: plot learning curves showing training errors and testing errors along iterations

TypeError: cannot unpack non-iterable NoneType object

<h4>Use numerical solution (stochastic gradient descent) to perform linear regression in the following cell</h4>

In [None]:
# Feature scaling
scaler = preprocessing.StandardScaler().fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)


# Task 4-1: Implement a function performing gradient descent
# Inputs: Training data, training label, leaerning rate, number of epoches, batch size
# Output: the final Weights
#         the loss history along batches
def myGradientDescentFun(X,y,learning_rate,epoches, batchsize):
    return 

# Task 4-2: Implement a function performing prediction
# Inputs: Testing data and weights
# Output: Predictions
def myPredictFun(X,w):
    return

# Here we insert a column of 1s into training_data and test_data (to be consistent with our lecture slides)
train_data_intercept = np.insert(train_data, 0, 1, axis=1)
test_data_intercept = np.insert(test_data, 0, 1, axis=1)

# Here we call myGradientDescentFun to train the model and get weights
# Note: you need to figure out good learning rate value and the number of iterations
w, loss = myGradientDescentFun(train_data_intercept,train_target,0,0,0)

# Task 4-3: show intercept and coefficents


# Task 4-4: show errors on training dataset and testing dataset


# Task 4-5: plot learning curves showing training errors and testing errors along bath