In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

In [2]:
boston = pd.read_csv("Boston.csv")
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [3]:
X = boston[['lstat']]
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [4]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.5279301917445974

# Polynomial Features

In [6]:
poly = PolynomialFeatures(degree=2, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6431149312301052

In [7]:
poly = PolynomialFeatures(degree=3, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6464637609380752

In [8]:
poly = PolynomialFeatures(degree=4, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6397924587035033

## Taking columns : 'rad' & 'lstat'

In [10]:
X = boston[["lstat", "rad"]]
y = boston["medv"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [11]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.5260520682885452

In [12]:
poly = PolynomialFeatures(degree=2, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6833321681968951

In [13]:
poly = PolynomialFeatures(degree=3, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6883404673189563

In [14]:
poly = PolynomialFeatures(degree=4, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.666965334431979

In [15]:
X_poly_trn.columns

Index(['lstat', 'rad', 'lstat^2', 'lstat rad', 'rad^2', 'lstat^3',
       'lstat^2 rad', 'lstat rad^2', 'rad^3', 'lstat^4', 'lstat^3 rad',
       'lstat^2 rad^2', 'lstat rad^3', 'rad^4'],
      dtype='object')

## Poly degree for all columns

In [33]:
X = boston.drop("medv", axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48


In [37]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.7133431144123461

In [39]:
poly = PolynomialFeatures(degree=2, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

0.6950286611541465

In [41]:
poly = PolynomialFeatures(degree=3, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

-8442.445261153322

In [43]:
poly = PolynomialFeatures(degree=4, include_bias=False).set_output(transform='pandas')
X_poly_trn = poly.fit_transform(X_train)
X_poly_tst = poly.transform(X_test)
lr.fit(X_poly_trn, y_train)
y_pred = lr.predict(X_poly_tst)
r2_score(y_test, y_pred)

-1370.9553199802765

TypeError: 'numpy.float64' object is not callable