In [None]:
import numpy as np
import pandas as pd
import math

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
%matplotlib inline

#import training dataset
train_df = pd.read_csv('/home/johan/Desktop/boston-housing/train.csv', index_col='ID')

#see the columns in our data
#train_df.info()
# take a look at the head of the dataset
train_df.head()

In [None]:
#create our X and y
#Drop specified labels from rows or columns.
#Remove rows or columns by specifying label names and corresponding axis, or by specifying directly index 
#or column names. When using a multi-index, labels on different levels can be removed by specifying the level.
X = train_df.drop('medv', axis=1)
y = train_df['medv']

print("X shape:", X.shape)
print("y shape:", y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

print('Training score: {}'.format(lr_model.score(X_train, y_train)))
print('Test score: {}'.format(lr_model.score(X_test, y_test)))

y_pred = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print('RMSE: {}'.format(rmse))

In [None]:
steps = [('scalar', StandardScaler()),('poly', PolynomialFeatures(degree=2)),('model', LinearRegression())]
pipeline = Pipeline(steps)
pipeline.fit(X_train, y_train)
print('Training score: {}'.format(pipeline.score(X_train, y_train)))
print('Test score: {}'.format(pipeline.score(X_test, y_test)))

In [None]:
For example, if an input sample is two dimensional and of the form [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].

The standard score of a sample x is calculated as:

    z = (x - u) / s

In [None]:
X = np.arange(6).reshape(3, 2)
print ('X:',X)
poly = PolynomialFeatures(2)
poly.fit_transform(X)


In [None]:
#PolynomialFeatures
#Generate polynomial and interaction features.

#StandardScaler(), Standardize features by removing the mean and scaling to unit variance
poly = PolynomialFeatures(2)
poly.fit_transform(X_train)
print(poly.fit_transform(X_train).shape)
print('-------------------')

steps = [('scalar', StandardScaler()),('poly', PolynomialFeatures(degree=2)),
    ('model', Ridge(alpha=10, fit_intercept=True))]

ridge_pipe = Pipeline(steps)
ridge_pipe.fit(X_train, y_train) 
print('Training Score: {}'.format(ridge_pipe.score(X_train, y_train)))
print('Test Score: {}'.format(ridge_pipe.score(X_test, y_test)))