In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

import plotly.graph_objects as go

In [2]:
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams.update({
  'font.size': 12,
  'grid.alpha': 0.25})

## Function declarations

### Read `ods` to `DataFrame`


In [3]:
def read_ods(filename, columns=None):
  if columns is None:
    df = pd.read_excel(filename, engine="odf", header=0)
  else:
    key_arr = (k for k in columns.keys())
    df = pd.read_excel(filename, engine="odf", header=0,
                      usecols=columns)
    df = df.rename(columns=columns)
  return df

### Get linear regression coefficients ($b_0$ and $b_1$)

In [4]:
def get_multiple_regress_coeff(df, n_vars=2):
  
  num_of_samples = len(df)
  X_arr = np.zeros((num_of_samples, n_vars))
  Y = df["Y"].to_numpy()

  for i in range(n_vars):
    key = f"X_{i+1}"
    X_arr[:, i] = df[key].to_numpy()

  regress = LinearRegression().fit(X_arr, Y)

  df["hat_Y"] = regress.intercept_ + X_arr.dot(regress.coef_) 

  return regress, df


## Load the data

In [5]:
filename = "06-table-04-milk-gallons.ods"

df = read_ods(filename)

df


Unnamed: 0,Week,"Sales (1,000s) Y",Price per Gallon ($) X_1,Advertising ($100s) X_2
0,1,10,1.3,9
1,2,6,2.0,7
2,3,5,1.7,5
3,4,12,1.5,14
4,5,10,1.6,15
5,6,15,1.2,12
6,7,5,1.6,6
7,8,12,1.4,10
8,9,17,1.0,15
9,10,20,1.1,21


## Fit multiple linear regression $b_0 + b_1 X_1 + b_2 X_2$

In [6]:
n_vars = 2

df_regress = df.copy()
df_regress = df_regress.rename(columns={
  "Sales (1,000s) Y": "Y",
  "Price per Gallon ($) X_1": "X_1",
  "Advertising ($100s) X_2": "X_2"})

regress, df_regress = get_multiple_regress_coeff(df_regress, n_vars=n_vars)

print(f"regress.intercept_ : {regress.intercept_}")
print(f"regress.coef_ : {regress.coef_}")
df_regress

regress.intercept_ : 16.40636514633729
regress.coef_ : [-8.24758014  0.5851009 ]


Unnamed: 0,Week,Y,X_1,X_2,hat_Y
0,1,10,1.3,9,10.950419
1,2,6,2.0,7,4.006911
2,3,5,1.7,5,5.310983
3,4,12,1.5,14,12.226408
4,5,10,1.6,15,11.98675
5,6,15,1.2,12,13.53048
6,7,5,1.6,6,6.720842
7,8,12,1.4,10,10.710762
8,9,17,1.0,15,16.935298
9,10,20,1.1,21,19.621146


In [7]:
x_min = df_regress['X_1'].min()
x_max = df_regress['X_1'].max()
x_data = np.linspace(0, x_max, 10)

y_min = df_regress['X_2'].min()
y_max = df_regress['X_2'].max()
y_data = np.linspace(0, y_max, 10)

z_data = regress.intercept_ + regress.coef_[0]*x_data.reshape((-1, 1)) \
  + regress.coef_[1]*y_data
#z_data

In [21]:
z_color = np.zeros_like(z_data)
# selected_point = [1, 8]
selected_point = list(range(10))
x_scatter_data = df_regress['X_1'].iloc[selected_point].to_numpy()
y_scatter_data = df_regress['X_2'].iloc[selected_point].to_numpy()
z_scatter_data = df_regress['Y'].iloc[selected_point].to_numpy()

fig = go.Figure(data=[
  go.Surface(z=z_data.T, x=x_data, y=y_data, 
    colorscale="Blues", surfacecolor=z_color,
    showscale=False, opacity=0.7),
  go.Scatter3d(x=x_scatter_data, y=y_scatter_data, z=z_scatter_data, 
    mode="markers", marker_size=4)])

#go.Figure(data=[go.Scatter3d(x=df_regress['X_1'], y=df_regress['X_2'], 
#  z=df_regress['Y'], mode="markers")])

theta = [10, 80, 70]
theta = np.deg2rad(theta)
a = 2.2
camera = {
  'center': {'x': 0, 'y': -0.1, 'z': -0.2},
  'eye': {'x': a*np.cos(theta[0]), 'y': a*np.cos(theta[1]), 'z': a*np.cos(theta[2])}}

fig.update_layout(
  title="Fitted regression plane", autosize=True,
  width=500, height=500,
  margin={'l':35, 'r': 30, 'b':15, 't':50,},
  scene={
    "xaxis_title": "Price per gallon ($)",
    "yaxis_title": "Advertising ($100s)",
    "zaxis_title": "Sales (1000s)"},
  scene_camera=camera)



fig.show()

## Forecast

In [10]:
X_1_forecast = 1.5
X_2_forecast = 10

b0 = np.round(regress.intercept_, 2)
b1, b2 = np.round(regress.coef_, 2)
print(f"(b0, b1, b2): {b0, b1, b2}")
# hat_Y = regress.intercept_ + regress.coef_.dot([X_1_forecast, X_2_forecast])
hat_Y = b0 + b1*X_1_forecast + b2*X_2_forecast

print(f"at (X_1, X_): {X_1_forecast, X_2_forecast}")
print(f"hat_Y: {hat_Y:.3f}")

(b0, b1, b2): (16.41, -8.25, 0.59)
at (X_1, X_): (1.5, 10)
hat_Y: 9.935


## Interpretation of regression coefficient

To illustrate the net effects of individual $X$'s on the response, consider the situtation in which price is to be \$1.00 per gallon and \$1,000 is to be spent on advertising. Then 

In [33]:
X_1_forecast = 1.00
X_2_forecast = 10

hat_Y = b0 + b1*X_1_forecast + b2*X_2_forecast
print(f"hat_Y: {hat_Y:.3f}")
print(f"Sales are forecast to be {int(round(hat_Y, 2)*1000):,d} gallons of milk.")

hat_Y_1 = hat_Y

hat_Y: 14.060
Sales are forecast to be 14,060 gallons of milk.


### Small increase in $X_1$
What is the effect on sales of a 1-cent price increase if \$1,000 is still spent on advertising?

In [34]:
X_1_forecast = 1.01
X_2_forecast = 10

hat_Y = b0 + b1*X_1_forecast + b2*X_2_forecast
print(f"hat_Y: {hat_Y:.3f}")
print(f"Sales are forecast to be {int(round(hat_Y, 2)*1000):,d} gallons of milk.")

hat_Y_2 = hat_Y

hat_Y: 13.977
Sales are forecast to be 13,980 gallons of milk.


In [40]:
print(f"Note that sales decrease by = {(hat_Y_1-hat_Y_2)*1000:.1f} gallons")

Note that sales decrease by = 82.5 gallons


### Small increase in $X_2$

What is the effect on sales of a \$100 increase in advertising if price remains constant at \$1.00?

In [41]:
X_1_forecast = 1.00
X_2_forecast = 11

hat_Y = b0 + b1*X_1_forecast + b2*X_2_forecast
print(f"hat_Y: {hat_Y:.3f}")
print(f"Sales are forecast to be {int(round(hat_Y, 2)*1000):,d} gallons of milk.")

hat_Y_3 = hat_Y


hat_Y: 14.650
Sales are forecast to be 14,650 gallons of milk.


In [43]:
print(f"Note that sales increase by = {(hat_Y_3-hat_Y_1)*1000:.0f} gallons")

Note that sales increase by = 590 gallons
