In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

In [None]:
df = pd.read_csv('Bos_Housing.csv')
X = df.drop('price', axis=1).values
y = df['price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print('Training Data:', X_train.shape[0])
print('Testing Data:', X_test.shape[0])

In [None]:
X_rm = df['rm'].values[:, np.newaxis]
y = df['price'].values

slr = LinearRegression()
slr.fit(X_rm, y)

y_train_pred = slr.predict(X_rm)

print(f'Slope (w_1): {slr.coef_[0]:.2f}')
print(f'Intercept/bias (w_0): {slr.intercept_:.2f}')

In [None]:
def lin_regplot(X, y, model):
    plt.scatter(X, y, c='blue')
    plt.plot(X, model.predict(X), color='red', linewidth=2)

lin_regplot(X_rm, y, slr)
plt.xlabel('Average number of rooms [RM]')
plt.ylabel('[PRICE]')
plt.tight_layout()
plt.show()

In [None]:
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

slr = LinearRegression()
slr.fit(X_train_std, y_train)

y_train_pred = slr.predict(X_train_std)
y_test_pred = slr.predict(X_test_std)

print(f'(MSE) train: {mean_squared_error(y_train,y_train_pred):.2f}, test: {mean_squared_error(y_test,y_test_pred):.2f}')
print(f'(R^2) train: {r2_score(y_train,y_train_pred):.2f}, test: {r2_score(y_test,y_test_pred):.2f}')

In [None]:
plt.scatter(
    y_train_pred,
    y_train_pred - y_train,
    c='blue',
    marker='o',
    label='Training data')
plt.scatter(
    y_test_pred,
    y_test_pred - y_test,
    c='lightgreen',
    marker='s',
    label='Test data')
plt.xlabel('Predicted values')
plt.ylabel('Residuals')
plt.legend(loc='upper left')
plt.hlines(y=0, xmin=-10, xmax=50, lw=2, color='red')
plt.xlim([-10, 50])
plt.tight_layout()
plt.show()

In [None]:
lin_regr = LinearRegression()
quad_regr = LinearRegression()
cubic_regr = LinearRegression()

quadratic = PolynomialFeatures(degree = 2)
cubic = PolynomialFeatures(degree = 3)

X_quad_train = quadratic.fit_transform(X_train_std)
X_quad_test = quadratic.fit_transform(X_test_std)
X_cubic_train = cubic.fit_transform(X_train_std)
X_cubic_test = cubic.fit_transform(X_test_std)
# print(len(X_train_std[0]), len(X_quad_train[0]), len(X_cubic_train[0]))

lin_regr.fit(X_train_std, y_train)
linear_r2_train = r2_score(y_train, lin_regr.predict(X_train_std))
linear_r2_test = r2_score(y_test, lin_regr.predict(X_test_std))

quad_regr.fit(X_quad_train, y_train)
quad_r2_train = r2_score(y_train, quad_regr.predict(X_quad_train))
quad_r2_test = r2_score(y_test, quad_regr.predict(X_quad_test))

cubic_regr.fit(X_cubic_train, y_train)
cubic_r2_train = r2_score(y_train, cubic_regr.predict(X_cubic_train))
cubic_r2_test = r2_score(y_test, cubic_regr.predict(X_cubic_test))

print('(R^2) train:\ndegree=1: %.2f, degree=2: %.2f, degree=3: %.2f'%
      (linear_r2_train, quad_r2_train, cubic_r2_train))
print('(R^2) test:\ndegree=1: %.2f, degree=2: %.2f, degree=3: %.2f'%
      (linear_r2_test, quad_r2_test, cubic_r2_test))

In [None]:
ridgeReg = Ridge(alpha=0.05, normalize=True)
ridgeReg.fit(X_cubic_train, y_train)

y_train_pred = ridgeReg.predict(X_cubic_train)
y_test_pred = ridgeReg.predict(X_cubic_test)

print('(MSE) train: %.2f, test: %.2f'%
      (mean_squared_error(y_train,y_train_pred), 
       mean_squared_error(y_test,y_test_pred)))
print('(R^2) train: %.2f, test: %.2f'%
      (r2_score(y_train,y_train_pred), 
       r2_score(y_test,y_test_pred)))

In [None]:
from sklearn.model_selection import cross_val_score
alpha_space = np.logspace(-4, 0, 50)
ridge_scores = []
ridge_scores_std = []
for alpha in alpha_space:
    ridge = Ridge(alpha=alpha, normalize=True)
    ridge_cv_scores = cross_val_score(ridge, X_cubic_train, y_train, cv=10)
    ridge_scores.append(np.mean(ridge_cv_scores))
    ridge_scores_std.append(np.std(ridge_cv_scores))

In [None]:
def display_plot(cv_scores, cv_scores_std):
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(alpha_space, cv_scores)
    std_error = cv_scores_std/ np.sqrt(10)
    ax.fill_between(alpha_space, cv_scores + std_error, cv_scores - std_error,color="c", alpha=0.2)
    ax.set_ylabel('CV R^2 Score +/- Std Error')
    ax.set_xlabel('Alpha')
    ax.axhline(np.max(cv_scores), linestyle='--', color='.5')
    ax.set_xlim([alpha_space[0], alpha_space[-1]])
    ax.set_xscale('log')
    plt.show()
    print('(R^2) max score:', np.max(cv_scores))    
    
display_plot(ridge_scores, ridge_scores_std)

In [None]:
lassoReg = Lasso(alpha=0.05, normalize=True)
lassoReg.fit(X_cubic_train, y_train)

y_train_pred = lassoReg.predict(X_cubic_train)
y_test_pred = lassoReg.predict(X_cubic_test)

print('(MSE) train: %.2f, test: %.2f'%
      (mean_squared_error(y_train,y_train_pred), 
       mean_squared_error(y_test,y_test_pred)))
print('(R^2) train: %.2f, test: %.2f'%
      (r2_score(y_train,y_train_pred), 
       r2_score(y_test,y_test_pred)))

In [None]:
alpha_space = np.logspace(-4, 0, 50)
lasso_scores = []
lasso_scores_std = []
lasso = Lasso(normalize=True)
for alpha in alpha_space:
    lasso.alpha = alpha
    lasso_cv_scores = cross_val_score(lasso, X_cubic_train, y_train, cv=10)
    lasso_scores.append(np.mean(lasso_cv_scores))
    lasso_scores_std.append(np.std(lasso_cv_scores))

In [None]:
def display_plot(cv_scores, cv_scores_std):
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(alpha_space, cv_scores)
    std_error = np.array(cv_scores_std) / np.sqrt(10)
    ax.fill_between(alpha_space, cv_scores + std_error, cv_scores - std_error,color="c", alpha=0.2)
    ax.set_ylabel('CV R^2 Score +/- Std Error')
    ax.set_xlabel('Alpha')
    ax.axhline(np.max(cv_scores), linestyle='--', color='.5')
    ax.set_xlim([alpha_space[0], alpha_space[-1]])
    ax.set_xscale('log')
    plt.show()
    print('(R^2) max score:', np.max(cv_scores))    
    
display_plot(lasso_scores, lasso_scores_std)