In [1]:
import numpy as np
import pandas as pd
import matplotlib as mp
import statsmodels.api as sm

In [20]:
mu, sigma = 0, 5 # mean and standard deviation of normal distribution for the error term
x = np.random.uniform(1,8,100)
epsilon = np.random.normal(mu,sigma,100)
y = 3 + 4*x + epsilon

In [21]:
model_reg = sm.OLS(y,x).fit()
model_reg.summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.944
Model:,OLS,Adj. R-squared (uncentered):,0.944
Method:,Least Squares,F-statistic:,1672.0
Date:,"Mon, 23 Sep 2024",Prob (F-statistic):,8.25e-64
Time:,13:55:14,Log-Likelihood:,-307.02
No. Observations:,100,AIC:,616.0
Df Residuals:,99,BIC:,618.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,4.6098,0.113,40.886,0.000,4.386,4.833

0,1,2,3
Omnibus:,3.429,Durbin-Watson:,2.033
Prob(Omnibus):,0.18,Jarque-Bera (JB):,2.568
Skew:,-0.245,Prob(JB):,0.277
Kurtosis:,2.387,Cond. No.,1.0


In [22]:
x_updated = sm.add_constant(x)
model_updated = sm.OLS(y,x_updated).fit()
model_updated.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.715
Model:,OLS,Adj. R-squared:,0.712
Method:,Least Squares,F-statistic:,245.8
Date:,"Mon, 23 Sep 2024",Prob (F-statistic):,1.84e-28
Time:,13:55:17,Log-Likelihood:,-304.18
No. Observations:,100,AIC:,612.4
Df Residuals:,98,BIC:,617.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.8714,1.201,2.391,0.019,0.489,5.254
x1,4.0509,0.258,15.680,0.000,3.538,4.564

0,1,2,3
Omnibus:,3.463,Durbin-Watson:,2.007
Prob(Omnibus):,0.177,Jarque-Bera (JB):,2.183
Skew:,-0.139,Prob(JB):,0.336
Kurtosis:,2.332,Cond. No.,11.3


When the model uses large values of x, the x-intercept becomes very far from the dataset, which makes the intercept insignificant

In [23]:
# We now generate autocorrelated error terms
epsilon[0] = np.random.normal(mu,sigma,1)
for i in range(0,99):
    epsilon[i+1]=0.4*epsilon[i]+0.6*np.random.normal(mu,sigma,1)

  epsilon[0] = np.random.normal(mu,sigma,1)
  epsilon[i+1]=0.4*epsilon[i]+0.6*np.random.normal(mu,sigma,1)


In [24]:
y = 3 + 4*x + epsilon

In [25]:
x_updated = sm.add_constant(x)
model_OLS = sm.OLS(y,x_updated).fit()
model_OLS.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.887
Model:,OLS,Adj. R-squared:,0.886
Method:,Least Squares,F-statistic:,768.0
Date:,"Mon, 23 Sep 2024",Prob (F-statistic):,3.64e-48
Time:,14:01:17,Log-Likelihood:,-248.03
No. Observations:,100,AIC:,500.1
Df Residuals:,98,BIC:,505.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.1861,0.685,3.192,0.002,0.827,3.545
x1,4.0836,0.147,27.713,0.000,3.791,4.376

0,1,2,3
Omnibus:,1.947,Durbin-Watson:,1.752
Prob(Omnibus):,0.378,Jarque-Bera (JB):,1.384
Skew:,-0.246,Prob(JB):,0.501
Kurtosis:,3.3,Cond. No.,11.3


In [26]:
from scipy.linalg import toeplitz
toeplitz(np.array([1,0.5,0,0,0,0,0,0]))

array([[1. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],
       [0.5, 1. , 0.5, 0. , 0. , 0. , 0. , 0. ],
       [0. , 0.5, 1. , 0.5, 0. , 0. , 0. , 0. ],
       [0. , 0. , 0.5, 1. , 0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 1. , 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0. , 0.5, 1. , 0.5, 0. ],
       [0. , 0. , 0. , 0. , 0. , 0.5, 1. , 0.5],
       [0. , 0. , 0. , 0. , 0. , 0. , 0.5, 1. ]])

In [27]:
rho = 0.4
cov_matrix = sigma**2*toeplitz(np.append([1,rho],np.zeros(98)))
sm.GLS(y,x_updated,cov_matrix).fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.9
Model:,GLS,Adj. R-squared:,0.899
Method:,Least Squares,F-statistic:,880.0
Date:,"Mon, 23 Sep 2024",Prob (F-statistic):,9.37e-51
Time:,14:04:34,Log-Likelihood:,-257.9
No. Observations:,100,AIC:,519.8
Df Residuals:,98,BIC:,525.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.1500,0.755,2.848,0.005,0.652,3.648
x1,4.0974,0.138,29.664,0.000,3.823,4.372

0,1,2,3
Omnibus:,5.872,Durbin-Watson:,2.84
Prob(Omnibus):,0.053,Jarque-Bera (JB):,5.289
Skew:,-0.474,Prob(JB):,0.071
Kurtosis:,3.609,Cond. No.,8.75


In [None]:
sm.