# Chapter2 Demonstration5

Example 2.2

Data : ```q-gdp4708.txt```

Page 78

In [1]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.ar_model import AutoReg
import numpy as np

# Load the data
# same as q-gnp4791.txt
gdp = pd.read_csv('./data/q-gdp4708.txt', delim_whitespace=True,header=0)
gdp

Unnamed: 0,year,mon,day,gdp
0,1947,1,1,237.2
1,1947,4,1,240.5
2,1947,7,1,244.6
3,1947,10,1,254.4
4,1948,1,1,260.4
...,...,...,...,...
243,2007,10,1,14031.2
244,2008,1,1,14150.8
245,2008,4,1,14294.5
246,2008,7,1,14412.8


In [2]:
# Log transformation of the GDP data (assuming it's in the fourth column)
gdp_log = np.log(gdp.loc[:,['gdp']])
gdp_log

Unnamed: 0,gdp
0,5.468904
1,5.482720
2,5.499624
3,5.538908
4,5.562219
...,...
243,9.549039
244,9.557526
245,9.567630
246,9.575872


In [3]:
# Fit an AR model on the differenced log GDP to determine the order
# 'mle' stands for maximum likelihood estimation
m1 = AutoReg(np.diff(gdp_log.iloc[:,0]), lags=1).fit()
m1

<statsmodels.tsa.ar_model.AutoRegResultsWrapper at 0x13ec45110>

In [4]:
# Order of the AR model
ar_order = m1.model.ar_lags[-1] if m1.model.ar_lags else 0
ar_order

1

In [5]:
# Perform Augmented Dickey-Fuller test
adf_result = adfuller(gdp_log.iloc[:,0], maxlag=10, regression='c', autolag=None)
adf_result

(-1.6108742961612152,
 0.47751667869874703,
 10,
 237,
 {'1%': -3.4582467982399105,
  '5%': -2.8738137461081323,
  '10%': -2.5733111490323846})

In [6]:
# Extract the ADF statistic and p-value
adf_statistic = adf_result[0]
adf_statistic

-1.6108742961612152

In [7]:
p_value = adf_result[1]
p_value

0.47751667869874703

In [8]:
ar_order, adf_statistic, p_value

(1, -1.6108742961612152, 0.47751667869874703)