# LOAD PACKAGES AND DATA

In [24]:
# LOAD DATASET
from sklearn.datasets import load_boston

# LOAD MODELS
from sklearn.linear_model import LinearRegression, BayesianRidge, LassoCV, ElasticNetCV
from sklearn.metrics import mean_squared_error

# LOAD EXTRAS
from sklearn.cross_validation import train_test_split, KFold
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = load_boston()

features = data.data
target = data.target

## Linear Regression (Train test split)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.33, random_state = 42)

In [26]:
lr_tts = LinearRegression(fit_intercept=True, normalize=True)
lr_tts.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [27]:
y_hat_lr_tts = lr_tts.predict(X_test)
lr_tts.score(X_test, y_test)

0.72585158182300646

In [31]:
mse_lr_tts = mean_squared_error(y_test, y_hat_lr_tts)
print(np.sqrt(mse_lr_tts))

4.55490322184


## Linear Regression (KFold)

In [34]:
kf = KFold(len(features), n_folds=5)

In [35]:
lr_kf = LinearRegression(fit_intercept=True, normalize=True)
scores_lr_kf = []
mse_lr_kf = []

In [38]:
for train, test in kf:
    lr_kf.fit(features[train], target[train])
    y_hat_lr_kf = lr_kf.predict(features[test])
    scores_lr_kf.append(lr_kf.score(features[test], target[test]))
    mse_lr_kf.append(np.sqrt(mean_squared_error(y_hat_lr_kf, target[test])))

In [46]:
for i in range(0, len(mse_lr_kf)):
    print('RMSE: {0}\t| SCORE: {1}'.format(mse_lr_kf[i], scores_lr_kf[i]))

RMSE: 3.53279637288	| SCORE: 0.638610693529
RMSE: 5.10844425141	| SCORE: 0.638610693529
RMSE: 5.75499399415	| SCORE: 0.713344317153
RMSE: 8.99072042601	| SCORE: 0.586451336256
RMSE: 5.79520108817	| SCORE: 0.0784249546783


## Lasso

## ElasticNet

In [28]:
l1_ratio = [.01, .05, .25, .5, .75, .95, .99]

kf = KFold(len(features), n_folds=5)
met = ElasticNetCV(n_jobs = 4, l1_ratio = l1_ratio)

scores = []
mse = []

In [29]:
for train, test in kf:
    met.fit(features[train], target[train])
    y_hat = met.predict(features[test])
    mse.append(mean_squared_error(target[test], y_hat))
    scores.append(met.score(features[test], target[test]))

In [23]:
for i in range(0, len(mse)):
    print('RMSE: {0}\t| SCORE: {1}'.format(np.sqrt(mse[i]), scores[i]))

RMSE: 4.20186755552	| SCORE: 0.488762380111
RMSE: 5.82730997593	| SCORE: 0.626990854355
RMSE: 7.52467450392	| SCORE: 0.293011565935
RMSE: 7.78636743235	| SCORE: 0.308787620353
RMSE: 4.44216554715	| SCORE: 0.257838137208


## Bayesian Ridge

In [20]:
clf = BayesianRidge(normalize=True)
clf.fit(X_train, y_train)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=True, tol=0.001, verbose=False)

In [21]:
y_hat = clf.predict(X_test)
clf.score(X_test, y_test)

0.72344702672946914