In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
data = pd.read_csv('Advertising.csv')
data.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [3]:
data.columns

Index(['TV', 'radio', 'newspaper', 'sales'], dtype='object')

In [4]:
data.shape

(200, 4)

In [5]:
features = data[['TV', 'newspaper', 'radio']]  #x
target = data[['sales']].values   #y

Convert Features to Polynomial features

In [6]:

features_poly = PolynomialFeatures(degree=2)

TV_poly = features_poly.fit_transform(features[['TV']])
newspaper_poly = features_poly.fit_transform(features[['newspaper']])
radio_poly = features_poly.fit_transform(features[['radio']])

In [7]:
poly_features = pd.concat([pd.DataFrame(TV_poly),pd.DataFrame(newspaper_poly), pd.DataFrame(radio_poly)], axis = 1)

In [8]:
poly_features

Unnamed: 0,0,1,2,0.1,1.1,2.1,0.2,1.2,2.2
0,1.0,230.1,52946.01,1.0,69.2,4788.64,1.0,37.8,1428.84
1,1.0,44.5,1980.25,1.0,45.1,2034.01,1.0,39.3,1544.49
2,1.0,17.2,295.84,1.0,69.3,4802.49,1.0,45.9,2106.81
3,1.0,151.5,22952.25,1.0,58.5,3422.25,1.0,41.3,1705.69
4,1.0,180.8,32688.64,1.0,58.4,3410.56,1.0,10.8,116.64
...,...,...,...,...,...,...,...,...,...
195,1.0,38.2,1459.24,1.0,13.8,190.44,1.0,3.7,13.69
196,1.0,94.2,8873.64,1.0,8.1,65.61,1.0,4.9,24.01
197,1.0,177.0,31329.00,1.0,6.4,40.96,1.0,9.3,86.49
198,1.0,283.6,80428.96,1.0,66.2,4382.44,1.0,42.0,1764.00


In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, target, random_state = 6)

In [12]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(150, 9)
(50, 9)
(150, 1)
(50, 1)


In [16]:
from sklearn.linear_model import LinearRegression

In [17]:
my_model = LinearRegression()
my_model.fit(X_train, y_train)

LinearRegression()

In [18]:
my_model.coef_

array([[ 0.00000000e+00,  7.76366570e-02, -1.12220872e-04,
         8.49147141e-16,  2.13781336e-02, -2.07802344e-04,
        -2.77555756e-17,  1.46025928e-01,  9.36830450e-04]])

In [19]:
my_model.intercept_

array([1.41776908])

#### Prediction and Evaluation on Train Data

In [20]:
preds_train = my_model.predict(X_train)
print('MAE : ', mean_absolute_error(y_train, preds_train))
print('MSE : ', mean_squared_error(y_train, preds_train))
print('RMSE:', np.sqrt(mean_squared_error(y_train, preds_train)))
print('R2 Score : ', my_model.score(X_train, y_train))

MAE :  1.202893489619854
MSE :  2.488776665854576
RMSE: 1.5775857079266964
R2 Score :  0.9139135358655949


#### Prediction and Evaluation on Test Data

In [21]:
preds = my_model.predict(X_test)
print('MAE : ', mean_absolute_error(y_test, preds))
print('MSE : ', mean_squared_error(y_test, preds))
print('RMSE:', np.sqrt(mean_squared_error(y_test, preds)))
print('R2 score : ', my_model.score(X_test, y_test)) 

MAE :  0.9912741510674818
MSE :  1.5270219257565474
RMSE: 1.2357272861584578
R2 score :  0.9293009924314467
