In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col 
import scipy as sp
from linearmodels import OLS
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

## Problem 4

In [2]:
df = pd.read_stata("FRED-QD.dta")
df.head()

Unnamed: 0,gdpc1,pcecc96,pcdgx,pcesvx,pcndx,gpdic1,fpix,y033rc1q027sbeax,pnfix,prfix,...,tlbsnnbbdix,tabsnnbx,tnwbsnnbx,tnwbsnnbbdix,cncfx,sp500,spindust,spdivyield,spperatio,time
0,3121.936035,1923.675049,76.6269,1228.252441,708.599426,340.604004,340.505005,49.2784,169.880295,229.2229,...,267125.59375,2292473.5,1977.079956,1674.5,118.069702,55.516701,59.139999,3.1765,18.621099,1959-01-01
1,3192.379883,1953.384033,79.510696,1246.623657,714.45752,367.098999,351.03479,50.753399,174.763901,237.236404,...,273793.59375,2300611.0,1973.300049,1650.650024,119.546799,57.506699,61.5867,3.1012,19.2899,1959-04-01
2,3194.653076,1973.791016,81.048103,1263.512207,717.215881,343.390991,354.755096,52.4063,179.470993,232.791794,...,275597.46875,2300263.75,1964.47998,1612.369995,121.838402,58.73,63.060001,3.072,18.954399,1959-07-01
3,3203.759033,1976.014038,77.176003,1278.822876,721.633179,354.566986,350.640411,52.300201,178.9702,226.201401,...,255408.9375,2294996.0,1964.310059,1517.150024,129.473297,57.763302,62.02,3.1572,17.8221,1959-10-01
4,3275.75708,1994.917969,79.658997,1290.439209,723.44397,390.209991,362.352295,54.119801,185.190506,233.161499,...,279155.40625,2316168.0,1972.790039,1603.829956,123.004799,56.276699,60.193298,3.385,16.8284,1960-01-01


In [6]:
df["oilpricex"].isna().sum()

0

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 236 entries, 0 to 235
Columns: 249 entries, gdpc1 to time
dtypes: datetime64[ns](1), float32(246), int16(1), int32(1)
memory usage: 231.9 KB


## (a)

In [4]:
## Transform the series by taking first differences

oilPrice = df["oilpricex"]
oilPrice_diff = oilPrice.diff(1).dropna()
oilPrice_diff.head()

1   -0.213600
2   -0.117300
3   -0.100201
4   -0.054699
5   -0.065802
Name: oilpricex, dtype: float32

## (b)

In [5]:
## Create a dataframe for lagged oil prices

exog = [oilPrice_diff.shift(1), oilPrice_diff.shift(2), oilPrice_diff.shift(3), oilPrice_diff.shift(4)]
exog = pd.concat([oilPrice_diff.shift(1), oilPrice_diff.shift(2), oilPrice_diff.shift(3), oilPrice_diff.shift(4)], axis=1)
exog.columns = ['oilpricex_d1', 'oilpricex_d2', 'oilpricex_d3', 'oilpricex_d4']
exog["const"] = 1
exog = exog.iloc[:, [4, 0, 1, 2, 3]]

## Estimate an AR(4) by OLS with heterokesdacity covarance matrix

mod = sm.OLS(endog=oilPrice_diff, exog=exog, missing="drop")
result1 = mod.fit(cov_type='HAC', cov_kwds={'maxlags':4})
print(result1.summary())

                            OLS Regression Results                            
Dep. Variable:              oilpricex   R-squared:                       0.107
Model:                            OLS   Adj. R-squared:                  0.091
Method:                 Least Squares   F-statistic:                     4.368
Date:                Thu, 07 Apr 2022   Prob (F-statistic):            0.00202
Time:                        08:48:35   Log-Likelihood:                -773.31
No. Observations:                 231   AIC:                             1557.
Df Residuals:                     226   BIC:                             1574.
Df Model:                           4                                         
Covariance Type:                  HAC                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.1491      0.440      0.339   

In [6]:
## Estimate an AR(4) by Conditional MLE with heterokesdacity covarance matrix

mod = AutoReg(oilPrice_diff, 4, old_names=False)
result2 = mod.fit(cov_type='HAC', cov_kwds={'maxlags':4})
print(result2.summary())

                            AutoReg Model Results                             
Dep. Variable:              oilpricex   No. Observations:                  235
Model:                     AutoReg(4)   Log Likelihood                -773.314
Method:               Conditional MLE   S.D. of innovations              6.881
Date:                Thu, 07 Apr 2022   AIC                              3.909
Time:                        08:48:35   BIC                              3.999
Sample:                             4   HQIC                             3.945
                                  235                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.1491      0.440      0.339      0.735      -0.713       1.011
oilpricex.L1     0.2737      0.090      3.042      0.002       0.097       0.450
oilpricex.L2    -0.2589      0.177     -1.46



## (c)

In [7]:
## Do the Wald-Test for the coefficients (OLS & Conditional MLE)

hypothesis1 = 'oilpricex_d1 = oilpricex_d2 = oilpricex_d3 = oilpricex_d4 = 0'
hypothesis2 = 'oilpricex.L1 = oilpricex.L2 = oilpricex.L3 = oilpricex.L4 = 0'


test1 = result1.wald_test(hypothesis1)
test2 = result2.wald_test(hypothesis2)

In [8]:
## Test resut for OLS
print(test1)

<Wald test (chi2): statistic=[[17.47259673]], p-value=0.0015641114733183064, df_denom=4>


In [9]:
## Test resut for Conditional MLE
print(test2)

<Wald test (chi2): statistic=[[17.47259673]], p-value=0.0015641114733183162, df_denom=4>


## (d)

Observing the above join test results, we can find that we need to reject the hypothesis the four AR coefficients are equal to zero under 0.01 significant level.
Furthermore, from the regression tables, we can make sure the only the one-period lagaaed coefficient is significant under 0.01 significant level. 
That is, we may need to consider an AR(1) instead of an AR(4).
Notice that this conclusion is quite robust since we have the very close regression tables by applying OLS and Conditional MLE.