In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [16]:
dataset = load_boston()

In [18]:
bos = pd.DataFrame(dataset.data, columns=dataset.feature_names)
bos['PRICE'] = dataset.target
print("features: {}".format(list(bos.columns.values)))

features: ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'PRICE']


In [19]:
X = pd.DataFrame(np.c_[bos['LSTAT'], bos['RM'], bos['CRIM'], bos['NOX']], columns=['LSTAT', 'RM', 'CRIM', 'NOX'])
Y = bos['PRICE'] 

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=5)

In [21]:
#LINEAR REGRESSION
lin = LinearRegression()
lin.fit(X_train, Y_train)

print('LR coefficients: {}'.format(lin.coef_))
print('LR intercept: {}'.format(lin.intercept_))
y_train_predict = lin.predict(X_train)
b, m = np.polynomial.polynomial.polyfit(Y_train, y_train_predict, 1)

LR coefficients: [-0.61126741  5.10745758 -0.12620017  1.48644349]
LR intercept: -2.2304479282335805


In [26]:
rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))
r2 = r2_score(Y_train, y_train_predict)
print('Root mean squared error is ' , round(rmse,2))
print('R2 score is ',round(r2,2))

Root mean squared error is  5.46
R2 score is  0.65


In [27]:
y_test_predict = lin.predict([[20,5,2,4]])
print("PRICE:",y_test_predict)

PRICE: [16.77486536]


In [28]:
#POLYNOMIAL REGRESSION
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
poly_model = LinearRegression()
poly_model.fit(X_train_poly, Y_train)
y_train_predicted = poly_model.predict(X_train_poly)
y_test_predicted = poly_model.predict(poly_features.fit_transform(X_test))

In [29]:
rmse_test = np.sqrt(mean_squared_error(Y_test, y_test_predicted))
r2_test = r2_score(Y_test, y_test_predicted)
print('Root mean squared error of test set is ' , round(rmse_test,2))
print('R2 score of test set is ',round(r2_test,2))

Root mean squared error of test set is  4.49
R2 score of test set is  0.76
