In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.width', 500)
np.set_printoptions(suppress=True)

data = pd.read_csv('Advertising.csv')
data['Radio'] += 0.0001

data['TV2'] = data['TV'] ** 2
data['TV3'] = data['TV'] ** 3
data['TV_LOG'] = np.log(data['TV'])
data['TV_REV'] = 1/data['TV']

data['Radio2'] = data['Radio'] ** 2
data['Radio3'] = data['Radio'] ** 3
data['Radio_LOG'] = np.log(data['Radio'])
data['Radio_REV'] = 1 / data['Radio']

print('Pearson Corr = \n', data.corr())

Pearson Corr = 
             Unnamed: 0        TV     Radio  Newspaper     Sales       TV2       TV3    TV_LOG    TV_REV    Radio2    Radio3  Radio_LOG  Radio_REV
Unnamed: 0    1.000000  0.017715 -0.110680  -0.154944 -0.051616  0.017686  0.016399  0.005157  0.033130 -0.091132 -0.075833  -0.099016   0.033783
TV            0.017715  1.000000  0.054809   0.056648  0.782224  0.967662  0.912265  0.887969 -0.286755  0.050844  0.048734   0.092570  -0.055387
Radio        -0.110680  0.054809  1.000000   0.354104  0.576223  0.078644  0.085030 -0.010842  0.078869  0.966752  0.912164   0.758066  -0.111558
Newspaper    -0.154944  0.056648  0.354104   1.000000  0.228299  0.076442  0.087403  0.040322 -0.074944  0.361405  0.348591   0.260393  -0.069735
Sales        -0.051616  0.782224  0.576223   0.228299  1.000000  0.735582  0.683391  0.751694 -0.317623  0.561608  0.535404   0.446286  -0.071268
TV2           0.017686  0.967662  0.078644   0.076442  0.735582  1.000000  0.984478  0.769122 -0.208433  0.

In [3]:
x_cols = ['TV', 'Radio', 'TV2', 'TV3', 'TV_LOG', 'TV_REV', 'Radio2', 'Radio3', 'Radio_LOG', 'Radio_REV']
x = data[x_cols]
mms = MinMaxScaler()
x = mms.fit_transform(x)
y = data['Sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
model = LinearRegression()
model.fit(x_train, y_train)
print(model.coef_, model.intercept_)
for col_name, coef in zip(x_cols, model.coef_):
    print('\t', col_name, coef)
y_train_pred = model.predict(x_train)
mae_train = mean_absolute_error(y_train, y_train_pred)
mse_train = mean_squared_error(y_train, y_train_pred)
rmse_train = np.sqrt(mse_train)
print('Train Set MAE = %f, MSE = %f, RMSE = %f' % (mae_train, mse_train, rmse_train))
print(rmse_train / np.mean(y_train))
print(np.mean(np.abs(y_train - y_train_pred) / y_train))

y_test_pred = model.predict(x_test)
mae_test = mean_absolute_error(y_test, y_test_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = np.sqrt(mse_test)
print('Test Set MAE = %f, MSE = %f, RMSE = %f' % (mae_test, mse_test, rmse_test))
print(rmse_test / np.mean(y_test))
print(np.mean(np.abs(y_test - y_test_pred) / y_test))

[ -6.89265823  50.37591023  12.67709229  -4.96382935  28.36037357
  36.97862675 -58.38373873  30.44410948 -41.30500977 -28.29657612] 15.652928784466871
	 TV -6.892658234817
	 Radio 50.37591022620066
	 TV2 12.677092293935786
	 TV3 -4.963829350595155
	 TV_LOG 28.36037356515398
	 TV_REV 36.97862675459254
	 Radio2 -58.38373873013242
	 Radio3 30.444109475743165
	 Radio_LOG -41.30500977221441
	 Radio_REV -28.296576124253427
Train Set MAE = 0.939474, MSE = 1.492113, RMSE = 1.221521
0.0850895263279687
0.07961306606684679
Test Set MAE = 1.558116, MSE = 16.062102, RMSE = 4.007755
0.30258627596171667
0.40598628463878605
