Add Augmented Dickey-Fuller unit root test (#92)

JuliaStats · Apr 18, 2017 · 43062d1 · 43062d1
1 parent e764c02
commit 43062d1
Show file tree

Hide file tree

Showing 3 changed files with 366 additions and 0 deletions.
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -142,4 +142,5 @@ include("power_divergence.jl")
 include("anderson_darling.jl")
 include("box_test.jl")
 include("breusch_godfrey.jl")
+include("augmented_dickey_fuller.jl")
 end
diff --git a/src/augmented_dickey_fuller.jl b/src/augmented_dickey_fuller.jl
@@ -0,0 +1,218 @@
+# augmented_dickey_fuller.jl
+# (Augmented) Dickey-Fuller unit root test
+#
+# Copyright (C) 2017   Benjamin Born
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+export ADFTest
+
+immutable ADFTest <: HypothesisTest
+    n               ::Int               # number of observations
+    deterministic   ::Symbol            # deterministic terms included in regression
+    lag             ::Int               # number of lags in test statistic
+    stat            ::Float64           # test statistic
+    coef            ::Float64           # coefficient on lagged (non-differenced) variable
+    cv              ::Vector{Float64}   # critical values
+end
+
+"""
+    ADFTest(y, deterministic, lag)
+
+Compute the augmented Dickey-Fuller unit root test.
+
+`y` is the time series to be tested, `deterministic` determines the deterministic terms
+(options: `none`, `constant`, `trend`, `squared_trend`) and `lag` the number of lagged
+first-differences included in the test regression, respectively.
+
+Critical values and asymptotic p-values are computed based on response surface regressions
+following MacKinnon (2010) and MacKinnon (1994), respectively. These may differ slightly
+from those reported in other regression packages as different algorithms might be used.
+
+References
+
+  * James G. MacKinnon, 2010, "Critical values for cointegration tests,"
+    QED Working Paper No. 1227, 2010, [link](http://ideas.repec.org/p/qed/wpaper/1227.html).
+  * James G. MacKinnon, 1994, "Approximate Asymptotic Distribution Functions for
+    Unit-Root and Cointegration Tests", Journal of Business & Economic Statistics,
+    Vol. 12, No. 2, pp. 167-176, [link](http://www.jstor.org/stable/1391481).
+
+External links
+
+  * [Augmented Dickey-Fuller test on Wikipedia](https://en.wikipedia.org/wiki/Augmented_Dickey–Fuller_test)
+"""
+function ADFTest{T<:Real}(y::AbstractVector{T}, deterministic::Symbol, lag::Int)
+
+    nobs = length(y)
+    Δy = diff(y)
+
+    if deterministic == :none
+        Δyt   = Δy[lag+1:end]
+        xt    = y[lag+1:end-1,1:1]
+        dfexo = 0
+    elseif deterministic == :constant
+        Δyt   = Δy[lag+1:end]
+        xt    = [ y[lag+1:end-1] ones(nobs-lag-1, 1) ]
+        dfexo = 1
+    elseif deterministic == :trend
+        Δyt   = Δy[lag+1:end]
+        xt    = [ y[lag+1:end-1] ones(nobs-lag-1, 1) (1:nobs-lag-1) ]
+        dfexo = 2
+    elseif deterministic == :squared_trend
+        Δyt   = Δy[lag+1:end]
+        xt    = [ y[lag+1:end-1] ones(nobs-lag-1, 1) (1:nobs-lag-1) [
+                  i^2 for i = 1:nobs-lag-1] ]
+        dfexo   = 3
+    else
+        throw(ArgumentError("deterministic = $(deterministic) is invalid"))
+    end
+
+    if lag > 0
+        Δylag = hcat([Δy[lag-i+1:end-i] for i=1:lag]...)
+        xt = [ xt Δylag ]
+    end
+
+    adf_coef = xt \ Δyt
+    adf_res  = Δyt - xt * adf_coef
+    n        = length(adf_res)
+    k        = lag + 1 + dfexo
+    sigma2   = dot(adf_res, adf_res) / (n - k)
+    inv_xtxt = inv(xt' * xt)
+    adf_stat = adf_coef[1] / sqrt(sigma2 * inv_xtxt[1,1])
+
+    crit_val = adf_cv(nobs, deterministic)
+
+    ADFTest(n, deterministic, lag, adf_stat, adf_coef[1], crit_val)
+
+end
+
+function adf_cv(nobs::Int, deterministic::Symbol)
+    # computes critical values
+    #
+    # based on James G. MacKinnon, "Critical values for cointegration tests,"
+    # QED Working Paper No. 1227, 2010. http://ideas.repec.org/p/qed/wpaper/1227.html
+
+    cv_coeff = [
+        # no constant, rows denote values for 1%, 5%, and 10% level, respectively
+        -2.56574 -02.2358 -03.627    0.000
+        -1.94100 -00.2686 -03.365   31.223
+        -1.61682  00.2656 -02.714   25.364
+        # constant
+        -3.43035 -06.5393 -16.786 -079.433
+        -2.86154 -02.8903 -04.234 -040.040
+        -2.56677 -01.5384 -02.809    0.000
+        # constant and trend
+        -3.95877 -09.0531 -28.428 -134.155
+        -3.41049 -04.3904 -09.036 -045.374
+        -3.12705 -02.5856 -03.925 -022.380
+        # constant, trend, and trend squared
+        -4.37113 -11.5882 -35.819 -334.047
+        -3.83239 -05.9057 -12.490 -118.284
+        -3.55326 -03.6596 -05.293 -063.559
+    ]
+
+    # Equation A.1 in MacKinnon (2010)
+    if deterministic == :none
+        adf_crit_val = cv_coeff[1:3, 1] + cv_coeff[1:3, 2] ./ nobs +
+                          cv_coeff[1:3, 3] ./ (nobs^2) + cv_coeff[1:3, 4] ./ (nobs^3)
+    elseif deterministic == :constant
+        adf_crit_val = cv_coeff[4:6, 1] + cv_coeff[4:6, 2] ./ nobs +
+                          cv_coeff[4:6, 3] ./ (nobs^2) + cv_coeff[4:6, 4] ./ (nobs^3)
+    elseif deterministic == :trend
+        adf_crit_val = cv_coeff[7:9, 1] + cv_coeff[7:9, 2] ./ nobs +
+                          cv_coeff[7:9, 3] ./ (nobs^2) + cv_coeff[7:9, 4] ./ (nobs^3)
+    elseif deterministic == :squared_trend
+        adf_crit_val = cv_coeff[10:12, 1] + cv_coeff[10:12, 2] ./ nobs +
+                          cv_coeff[10:12, 3] ./ (nobs^2) + cv_coeff[10:12, 4] ./ (nobs^3)
+    else
+        throw(ArgumentError("deterministic = $(deterministic) is invalid"))
+    end
+
+end
+
+function adf_pv_aux(adf_stat::Float64, deterministic::Symbol)
+    # helper function for p-value computation
+    #
+    # based on James G. MacKinnon, "Approximate Asymptotic Distribution Functions for
+    # Unit-Root and Cointegration Tests", Journal of Business & Economic Statistics,
+    # Vol. 12, No. 2 (Apr., 1994), pp. 167-176, http://www.jstor.org/stable/1391481
+    #
+    # A slightly more accurate approach could be based on an algorithm described in
+    # James G. MacKinnon, "Numerical Distribution Functions for Unit Root and Cointegration
+    # Tests", Journal of Applied Econometrics, Vol. 11, No. 6 (Nov.-Dec., 1996),
+    # pp. 601-618, http://www.jstor.org/stable/2285154
+    # and made available under GPL as Fortran routines (used e.g. by R's urca package) at
+    # http://qed.econ.queensu.ca/pub/faculty/mackinnon/numdist/
+
+    pv_coeff_smallp = [
+        0.6344 1.2378 0.032496 -1.04 -19.04
+        2.1659 1.4412 0.038269 -1.61 -18.83
+        3.2512 1.6047 0.049588 -2.89 -16.18
+        4.0003 1.6580 0.048288 -3.21 -17.17
+    ]
+
+    pv_coeff_largep = [
+        0.4797 0.93557 -0.06999  0.033066 -1.04 Inf
+        1.7339 0.93202 -0.12745 -0.010368 -1.61 2.74
+        2.5261 0.61654 -0.37956 -0.060285 -2.89 0.70
+        3.0778 0.49529 -0.41477 -0.059359 -3.21 0.54
+    ]
+
+    if deterministic == :none
+        tab_row = 1
+    elseif deterministic == :constant
+        tab_row = 2
+    elseif deterministic == :trend
+        tab_row = 3
+    elseif deterministic == :squared_trend
+        tab_row = 4
+    else
+        throw(ArgumentError("deterministic = $(deterministic) is invalid"))
+    end
+
+    if adf_stat < pv_coeff_smallp[tab_row, 5]
+        aux_var = -Inf
+    elseif adf_stat > pv_coeff_largep[tab_row, 6]
+        aux_var = Inf
+    else
+        if adf_stat < pv_coeff_smallp[tab_row, 4]
+            aux_var = pv_coeff_smallp[tab_row, 1] + pv_coeff_smallp[tab_row, 2] * adf_stat +
+                        pv_coeff_smallp[tab_row, 3] * (adf_stat^2)
+        else
+            aux_var = pv_coeff_largep[tab_row, 1] + pv_coeff_largep[tab_row, 2] * adf_stat +
+                        pv_coeff_largep[tab_row, 3] * (adf_stat^2) +
+                        pv_coeff_largep[tab_row, 4] * (adf_stat^3)
+        end
+
+    end
+end
+
+testname(::ADFTest) = "Augmented Dickey-Fuller unit root test"
+population_param_of_interest(x::ADFTest) =
+    ("coefficient on lagged non-differenced variable", 0, x.coef)
+
+function show_params(io::IO, x::ADFTest, ident)
+    println(io, ident, "sample size in regression:          ", x.n)
+    println(io, ident, "number of lags:                     ", x.lag)
+    println(io, ident, "ADF statistic:                      ", x.stat)
+    println(io, ident, "Critical values at 1%, 5%, and 10%: ", x.cv')
+end
+
+pvalue(x::ADFTest) = HypothesisTests.pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=:left)
diff --git a/test/augmented_dickey_fuller.jl b/test/augmented_dickey_fuller.jl
@@ -0,0 +1,147 @@
+using HypothesisTests, Base.Test
+
+sim_data_h0 = [
+    0.3823959677906078, -0.2152385089376233, -0.22568375357499895, -1.064710607963763,
+    -0.7535992694654291, 1.5414885543718815, -0.7255977944286491, -0.19563221826190302,
+    0.23578930816100901, 0.8194975957297876, 1.7827692007679783, 2.2415601558216953,
+    1.719223398400187, 2.1276192367249394, 2.077168006791274, 1.3835143629873885,
+    -0.39031889042066714, -0.26955292548322785, -1.0271859632037557, -2.7569421445944418,
+    -1.9609935225898258, -1.2909315413337634, -0.7400791638330025, -0.8034537562625632,
+    0.5334890259883591, 0.46034039265061977, -0.28512392399013586, -1.5051790525247886,
+    -1.558356390052481, -1.7234927478553148, -3.838862996282626, -3.9056309827892153,
+    -2.6831691343211537, -2.1154736745827103, -1.351974746259323, -0.973375866405973,
+    -1.61897292304257, -2.2836191674336015, -4.086645491644496, -5.37569309549424,
+    -5.710963175012358, -5.6404955736512, -5.298701806271377, -3.563536140216995,
+    -2.263620175906803, -2.057256094504186, -3.0661160500417894, -3.9161717203571875,
+    -2.786762284472861, -4.135295740964688, -2.8950048405356283, -2.9506519337429524,
+    -2.1912617197974926, -2.221528606444255, -2.5826112742003486, -4.589914476732911,
+    -5.158773604079549, -6.308401128476547, -4.427778953845362, -5.898403495232742,
+    -6.433614465398474, -7.397158425223975, -8.78226649289989, -8.64796625533862,
+    -9.26408292134744, -10.984072277667181, -10.66330299767977, -12.11067589556128,
+    -12.602947083630847, -13.016034467614029, -12.014105955158076, -10.839424629182965,
+    -12.166394974245012, -13.759609842289079, -14.198319818047874, -12.972522320050007,
+    -11.483432041980393, -9.32399885487954, -10.233477872917042, -9.085657572304541,
+    -9.590729768826915, -9.857796804045044, -8.358714410114315, -7.561410249938924,
+    -7.733050693246021, -8.202130303176117, -7.984506281967517, -7.625359874365167,
+    -7.30533515865711, -7.046118830601424, -6.586422845559796, -6.376436485517654,
+    -4.8532610943403895, -3.5934664907281935, -3.758749591842454, -3.602555096954818,
+    -4.416870391331517, -6.126738597336108, -4.626800721222075, -4.755716359752092,
+    -5.205900331311935
+]
+
+t = ADFTest(sim_data_h0, :none, 0)
+
+@test t.n == 100
+@test t.deterministic == :none
+@test t.lag == 0
+@test t.stat ≈ -0.6050597112208628
+@test t.coef ≈ -0.009972855420884913
+@test t.cv ≈ [ -2.5882321870404863, -1.9439589708250309, -1.614431731329 ]
+@test pvalue(t) ≈ 0.4525095990064268
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h0, :constant, 4)
+
+@test t.n == 96
+@test t.deterministic == :constant
+@test t.lag == 4
+@test t.stat ≈ -1.3432252172180439
+@test t.coef ≈ -0.034891024784937275
+@test t.cv ≈ [ -3.49681816639021, -2.8906107514600103, -2.5822770483285953 ]
+@test pvalue(t) ≈ 0.6091721848538273
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h0, :trend, 1)
+
+@test t.n == 99
+@test t.deterministic == :trend
+@test t.lag == 1
+@test t.stat ≈ -1.643937136091079
+@test t.coef ≈ -0.06200340824254233
+@test t.cv ≈ [ -4.051321648595895, -3.4548891419983088, -3.1530564880069027 ]
+@test pvalue(t) ≈ 0.7747580288456625
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h0, :squared_trend, 10)
+
+@test t.n == 90
+@test t.deterministic == :squared_trend
+@test t.lag == 10
+@test t.stat ≈ -2.0185041960365777
+@test t.coef ≈ -0.11370011315231617
+@test t.cv ≈ [ -4.489700198611862, -3.8922014722784897, -3.5900742238045 ]
+@test pvalue(t) ≈ 0.8095687207005545
+show(IOBuffer(), t)
+
+sim_data_h1 = [
+    0.3823959677906078, -0.4064364928329272, -0.21366349105383925, -0.9458585999156837,
+    -0.161817961459508, 2.2141788431075566, -1.1599969272467523, -0.05003288745663004,
+    0.406405082694597, 0.7869108289160771, 1.3567270194962293, 1.1371544648018315,
+    0.04624047497940731, 0.4315160758144559, 0.16530680797356267, -0.6110002398171043,
+    -2.079333373316608, -0.9189007217208647, -1.21708339858096, -2.338297880681166,
+    -0.3732003183359671, 0.48346182208807886, 0.7925832885448003, 0.33291705184283954,
+    1.503401308172342, 0.6785520207484317, -0.4061883062665398, -1.4231492816679225,
+    -0.7647519783616538, -0.5475123469836606, -2.3891264219191415, -1.2613311974661598,
+    0.5917962497349819, 0.8635935846059344, 1.1952957206263544, 0.9762467401665272,
+    -0.15747368655333344, -0.7433830876676981, -2.174717868044743, -2.376406537872116,
+    -1.5234733484541754, -0.6912690728659298, -0.003840769053141846, 1.7332452815278112,
+    2.1665386050740976, 1.2896333839396656, -0.3640432635677706, -1.032077302099283,
+    0.6133707848346852, -1.0418480640744843, 0.7193668683918175, 0.3040363409885845,
+    0.911408384439752, 0.4254373055731135, -0.1483640149695369, -2.081485210017331,
+    -1.6096017323553033, -1.9544283905746491, 0.9034079793438605, -1.0189205517154498,
+    -1.0446712460234568, -1.4858795828372295, -2.1280478590945284, -0.9297236919859942,
+    -1.0809785120018163, -2.26047861232065, -0.8094700261729145, -1.8521079109679686,
+    -1.4183251435535507, -1.1222499557599581, 0.4408035345759739, 1.395083093263098,
+    -0.6294287984304979, -1.907929267259316, -1.3926746093884548, 0.5294601933036396,
+    1.7538203747214345, 3.0363433744615707, 0.6086926691932826, 1.4521666352091431,
+    0.22101112108219845, -0.15656147467703047, 1.4208016565922137, 1.5077049884714973,
+    0.5822120509286517, -0.1779735844657705, 0.12863722897571492, 0.42346502209020714,
+    0.5317572267531613, 0.5250949414322668, 0.7222434557577614, 0.5711080879210234,
+    1.8087294351377763, 2.1641593211810846, 0.9167965594762817, 0.6145927746257767,
+    -0.5070189070638104, -1.9633776595364965, 0.5182490463457843, 0.13020888464287564,
+    -0.3850795292384054
+]
+
+t = ADFTest(sim_data_h1, :none, 2)
+
+@test t.n == 98
+@test t.deterministic == :none
+@test t.lag == 2
+@test t.stat ≈ -4.856134193950414
+@test t.coef ≈ -0.5258695197499671
+@test t.cv ≈ [ -2.5882321870404863, -1.9439589708250309, -1.614431731329 ]
+@test pvalue(t) ≈ 2.011399607967822e-6
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h1, :constant, 0)
+
+@test t.n == 100
+@test t.deterministic == :constant
+@test t.lag == 0
+@test t.stat ≈ -5.371126891727986
+@test t.coef ≈ -0.45433645147215723
+@test t.cv ≈ [ -3.49681816639021, -2.8906107514600103, -2.5822770483285953 ]
+@test pvalue(t) ≈ 3.8937264616617045e-6
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h1, :trend, 7)
+
+@test t.n == 93
+@test t.deterministic == :trend
+@test t.lag == 7
+@test t.stat ≈ -2.636305455046
+@test t.coef ≈ -0.4407260538688699
+@test t.cv ≈ [ -4.051321648595895, -3.4548891419983088, -3.1530564880069027 ]
+@test pvalue(t) ≈ 0.2634673926249714
+show(IOBuffer(), t)
+
+t = ADFTest(sim_data_h1, :squared_trend, 4)
+
+@test t.n == 96
+@test t.deterministic == :squared_trend
+@test t.lag == 4
+@test t.stat ≈ -4.300112549464304
+@test t.coef ≈ -0.6759438045021552
+@test t.cv ≈ [ -4.489700198611862, -3.8922014722784897, -3.5900742238045 ]
+@test pvalue(t) ≈ 0.012662962461719612
+show(IOBuffer(), t)