In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

import statsmodels.api as sm
from statsmodels.stats.outliers_influence \
import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm

from ISLP import load_data
from ISLP.models import (ModelSpec as MS, 
                            summarize, 
                            poly)

In [2]:
rng = np.random.default_rng(1)
x = rng.normal(size = 100)
y = 2 * x + rng.normal(size = 100)

In [3]:
data = pd.DataFrame({'x': x, 'y': y}) # a data frame is needed to use ModelSpec()

X = MS(["x"], intercept = False).fit_transform(data)
model1 = sm.OLS(data.y, X)
results1 = model1.fit()
summarize(results1)

Unnamed: 0,coef,std err,t,P>|t|
x,1.9762,0.117,16.898,0.0


In [4]:
Y = MS(["y"], intercept = False).fit_transform(data)
model2 = sm.OLS(data.x, Y)
results2 = model2.fit()
summarize(results2)

Unnamed: 0,coef,std err,t,P>|t|
y,0.3757,0.022,16.898,0.0


In [5]:
t_calculated = np.sqrt(data.shape[0] - 1) * sum(data.x * data.y) / np.sqrt((sum(data.x ** 2)) * (sum(data.y ** 2)) - (sum(data.x * data.y)) ** 2)
print("t calculated: %.4f\nmodel1 t value: %.4f\nmodel2 t value: %.4f" 
      %(t_calculated, results1.tvalues, results2.tvalues))

t calculated: 16.8984
model1 t value: 16.8984
model2 t value: 16.8984


In [24]:
x = rng.normal(size = 100)
y = 100 + 10 * x + rng.normal(scale = 1, size = 100)
data2 = pd.DataFrame({
    "x": x, 
    "y": y
})

In [25]:
X = MS(["x"]).fit_transform(data2)
model3 = sm.OLS(data2.y, X)
summarize(model3.fit())

Unnamed: 0,coef,std err,t,P>|t|
intercept,99.9063,0.094,1058.338,0.0
x,10.0496,0.108,93.401,0.0


In [26]:
Y = MS(["y"]).fit_transform(data2)
model4 = sm.OLS(data2.x, Y)
summarize(model4.fit())

Unnamed: 0,coef,std err,t,P>|t|
intercept,-9.8307,0.106,-92.939,0.0
y,0.0984,0.001,93.401,0.0
