# ISLR Sec 6-6 Problem 11

In [None]:
from __future__ import print_function
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeCV,LassoCV,LinearRegression
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error

# load data that has been cleaned
df = pd.read_csv('../Data/Boston.csv')
print(df.shape)
df.head(3)

### (a)

In [None]:
# format & split data 
x = df.drop('crim',axis=1).values
y = df.crim.values
y = np.reshape(y,(len(y),1))
col = df.drop('crim',axis=1).columns
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=0)

In [None]:
# ordinary linear regression
LR = LinearRegression(normalize=True)
LR.fit(x_train,y_train)
y_pred = LR.predict(x_test)
MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)
print('linear regression test RMSE = %0.3f' % RMSE)
print()
print('intercept = %0.3f' % LR.intercept_)
print(pd.DataFrame(LR.coef_,columns=col).T)

In [None]:
# ridge regression
alpha = 10**np.linspace(5,-5,100)
LR_ridge = RidgeCV(alphas=alpha,normalize=True)
LR_ridge.fit(x_train,y_train)
y_pred = LR_ridge.predict(x_test)
MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)
print('ridge regression test RMSE = %0.3f' % RMSE)
print('optimal alpha = %0.3f' % LR_ridge.alpha_)
print()
print('intercept = %0.3f' % LR_ridge.intercept_)
print(pd.DataFrame(LR_ridge.coef_,columns=col).T)

In [None]:
# lasso regression
alpha = 10**np.linspace(5,-5,100)
LR_lasso = LassoCV(alphas=alpha,normalize=True)
LR_lasso.fit(x_train,y_train.flatten())
y_pred = LR_lasso.predict(x_test)
MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)
print('lasso regression test RMSE = %0.3f' % RMSE)
print('optimal alpha = %0.3f' % LR_lasso.alpha_)
print()
print('intercept = %0.3f' % LR_lasso.intercept_)
coef = LR_lasso.coef_
coef = np.reshape(coef,(1,len(coef)))
print(pd.DataFrame(coef,columns=col).T)

In [None]:
# lasso regression (suboptimal)
LR_lasso = LassoCV(alphas=[0.07],normalize=True)
LR_lasso.fit(x_train,y_train.flatten())
y_pred = LR_lasso.predict(x_test)
MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)
print('lasso regression test RMSE = %0.3f' % RMSE)
print('optimal alpha = %0.3f' % LR_lasso.alpha_)
print()
print('intercept = %0.3f' % LR_lasso.intercept_)
coef = LR_lasso.coef_
coef = np.reshape(coef,(1,len(coef)))
print(pd.DataFrame(coef,columns=col).T)

All models are comparible in RMSE, but using trial and error, lasso with `alpha = 0.07` yeilds dramatically simplified model (only three nonzero coefficients) with slight increase in RMSE. Since this increase was measured for only one train/test split, further testing is warrented.