In [1]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

In [2]:
import statsmodels.api as sm
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize, poly)

In [3]:
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize, poly)

In [4]:
Boston = load_data("Boston")

In [5]:
Boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'lstat', 'medv'],
      dtype='object')

In [6]:
X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),
'lstat': Boston['lstat']})
X[:4]

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94


In [7]:
# we extract the response and fit the model
y = Boston['medv']
model = sm.OLS(y, X)
results = model.fit()

In [8]:
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,34.5538,0.563,61.415,0.0
lstat,-0.95,0.039,-24.528,0.0


In [12]:
# The model is specified using ModelSpec() in the ISLP library; it creates a transform object
# --> then methods transform and fit are used to construct a corresponding model matrix
design = MS(['lstat'])
design = design.fit(Boston)
X = design.transform(Boston)
X[:4]

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94


In [14]:
# fit_transform combines the two methods applied above
design = MS(['lstat'])
X = design.fit_transform(Boston)
X[:4]

Unnamed: 0,intercept,lstat
0,1.0,4.98
1,1.0,9.14
2,1.0,4.03
3,1.0,2.94


In [16]:
# returned to our fitted regression model
results.summary()

0,1,2,3
Dep. Variable:,medv,R-squared:,0.544
Model:,OLS,Adj. R-squared:,0.543
Method:,Least Squares,F-statistic:,601.6
Date:,"Sun, 07 Jan 2024",Prob (F-statistic):,5.08e-88
Time:,11:34:15,Log-Likelihood:,-1641.5
No. Observations:,506,AIC:,3287.0
Df Residuals:,504,BIC:,3295.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,34.5538,0.563,61.415,0.000,33.448,35.659
lstat,-0.9500,0.039,-24.528,0.000,-1.026,-0.874

0,1,2,3
Omnibus:,137.043,Durbin-Watson:,0.892
Prob(Omnibus):,0.0,Jarque-Bera (JB):,291.373
Skew:,1.453,Prob(JB):,5.36e-64
Kurtosis:,5.319,Cond. No.,29.7


In [17]:
# retrieve fitted coeffients
results.params

intercept    34.553841
lstat        -0.950049
dtype: float64

In [19]:
# to test our model we try to predict data of a new dataframe we create
new_df = pd.DataFrame({'lstat': [5, 10, 15]})

# we use the method trasnform to create the model matrix
newX = design.transform(new_df)
newX

Unnamed: 0,intercept,lstat
0,1.0,5
1,1.0,10
2,1.0,15


In [23]:
# now we compute the prediction
new_predictions = results.get_prediction(newX)
new_predictions.predicted_mean

array([29.80359411, 25.05334734, 20.30310057])

In [25]:
# we can compute confidence intervals
new_predictions.conf_int(alpha = 0.05)

array([[29.00741194, 30.59977628],
       [24.47413202, 25.63256267],
       [19.73158815, 20.87461299]])

In [26]:
# we can compute prediction intervals setting obs = True
new_predictions.conf_int(obs=True, alpha = 0.05)

array([[17.56567478, 42.04151344],
       [12.82762635, 37.27906833],
       [ 8.0777421 , 32.52845905]])