In [29]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
from IPython.display import Markdown as md


In [30]:
init_notebook_mode(connected=True)


In [31]:
def coefficient_of_determination(y, p_y):
    assert (len(y) == len(p_y)), "Vectors must be same length"
    y_m = np.mean(y)
    s_s_tot = np.sum((y - y_m)**2)
    residuals = np.diff(np.array([y, p_y]), axis=0)
    s_s_res = np.sum(residuals**2)
    return 1 - s_s_res / s_s_tot



In [32]:
df_adv = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
x = df_adv['TV']
y = df_adv['radio']
z = df_adv['sales']

feature_cols = ['TV', 'radio']

X = df_adv[feature_cols]

In [33]:
df_adv.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [34]:
md("**Multiple Linear Regression Example** ")


**Multiple Linear Regression Example** 

In [35]:

md("__Sales = beta0 + beta1 * TV + beta2*Radio__")


__Sales = beta0 + beta1 * TV + beta2*Radio__

In [36]:
regressor = LinearRegression()
regressor.fit(X, z)


LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [37]:
b0 = regressor.intercept_
b1 = regressor.coef_[0]
b2 = regressor.coef_[1]

print(b0,b1,b2)


2.921099912405138 0.045754815101076145 0.18799422662030918


In [38]:
t1 = range(-150,450)
t2 = range(-30,80)
xv, yv = np.meshgrid(t1, t2)
xvf = xv.flatten()
yvf = yv.flatten()
pred=b0+b1*xvf+b2*yvf 


In [39]:
data_trace = go.Scatter3d(
    x=x,
    y=y,
    z=z,
    mode='markers',
    name='TV + Radio'
)


In [40]:
reg_trace=go.Mesh3d(x=xvf,y=yvf,z=pred,color='red',opacity=0.5, name='Predicted Sales')


In [41]:
rmse = np.sqrt(np.mean((b0+b1*x+b2*y-z)**2))
md("__Root-Mean Square Error = %r__"%(np.around(rmse, decimals=2)))


__Root-Mean Square Error = 1.67__

In [42]:
pred_c = b0+b1*x+b2*y
cd = coefficient_of_determination(y, pred_c)
md("__Coefficient of Determination = %r__"%(np.around(cd, decimals=3)))


__Coefficient of Determination = -0.095__

In [43]:
fig = go.Figure(data=[data_trace,reg_trace])
iplot(fig)