JuliaStats · sprague252 · Mar 2, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 13, 2023
diff --git a/src/power_divergence.jl b/src/power_divergence.jl
@@ -237,14 +237,17 @@ end
 # Under regularity conditions, their asymptotic distributions are all the same (Drost 1989)
 # Chi-squared null approximation works best for lambda near 2/3
 """
-    PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x))
+    PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x), ddof = 0)
 
 Perform a Power Divergence test.
 
 If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
 a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
 table). In this case, the hypothesis tested is whether the population probabilities equal
-those in `theta0`, or are all equal if `theta0` is not given.
+those in `theta0`, or are all equal if `theta0` is not given. The `ddof` parameter is the 
+"delta degrees of freedom" adjustment to the number of degrees of freedom used for 
+calculation of p-values. The number of degrees of freedom is decreased by `ddof`.
+
 
 If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
 contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
@@ -284,7 +287,7 @@ Implements: [`pvalue`](@ref), [`confint(::PowerDivergenceTest)`](@ref)
 
   * Agresti, Alan. Categorical Data Analysis, 3rd Edition. Wiley, 2013.
 """
-function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x), ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
 
     nrows, ncols = size(x)
     n = sum(x)
@@ -297,13 +300,13 @@ function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector
     if nrows > 1 && ncols > 1
         rowsums = sum(x, dims=2)
         colsums = sum(x, dims=1)
-        df = (nrows - 1) * (ncols - 1)
+        df = (nrows - 1) * (ncols - 1) - ddof
         thetahat = x ./ n
         xhat = rowsums * colsums / n
         theta0 = xhat / n
         V = Float64[(colsums[j]/n) * (rowsums[i]/n) * (1 - rowsums[i]/n) * (n - colsums[j]) for i in 1:nrows, j in 1:ncols]
     elseif nrows == 1 || ncols == 1
-        df = length(x) - 1
+        df = length(x) - 1 - ddof
         xhat = reshape(n * theta0, size(x))
         thetahat = x / n
         V = reshape(n .* theta0 .* (1 .- theta0), size(x))
@@ -337,18 +340,18 @@ end
 #convenience functions
 
 #PDT
-function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; lambda::U=1.0, ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
     d = counts(x, y, levels)
-    PowerDivergenceTest(d, lambda=lambda)
+    PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
 end
 
-function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; lambda::U=1.0, ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
     d = counts(x, y, k)
-    PowerDivergenceTest(d, lambda=lambda)
+    PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
 end
 
-PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
-    PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0)
+PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x), ddof::Int64=0) where {T<:Integer,U<:AbstractFloat} =
+    PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0, ddof=ddof)
 
 #ChisqTest
 """
@@ -360,7 +363,9 @@ with ``λ = 1``).
 If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
 a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
 table). In this case, the hypothesis tested is whether the population probabilities equal
-those in `theta0`, or are all equal if `theta0` is not given.
+those in `theta0`, or are all equal if `theta0` is not given. The `ddof` parameter is the 
+"delta degrees of freedom" adjustment to the number of degrees of freedom used for 
+calculation of p-values. The number of degrees of freedom is decreased by `ddof`.
 
 If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
 contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
@@ -373,22 +378,22 @@ Note that the entries of `x` (and `y` if provided) must be non-negative integers
 
 Implements: [`pvalue`](@ref), [`confint`](@ref)
 """
-function ChisqTest(x::AbstractMatrix{T}) where T<:Integer
-    PowerDivergenceTest(x, lambda=1.0)
+function ChisqTest(x::AbstractMatrix{T}; ddof::Int64=0) where T<:Integer
+    PowerDivergenceTest(x, lambda=1.0, ddof=ddof)
 end
 
-function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer
+function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; ddof::Int64=0) where T<:Integer
     d = counts(x, y, levels)
-    PowerDivergenceTest(d, lambda=1.0)
+    PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
 end
 
-function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer
+function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; ddof::Int64=0) where T<:Integer
     d = counts(x, y, k)
-    PowerDivergenceTest(d, lambda=1.0)
+    PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
 end
 
-ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
-    PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0)
+ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x); ddof::Int64=0) where {T<:Integer,U<:AbstractFloat} =
+    PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0, ddof=ddof)
 
 #MultinomialLRTest
 """

diff --git a/test/power_divergence.jl b/test/power_divergence.jl
@@ -198,4 +198,10 @@ MultinomialLRTest(x,y,(1:3,1:3))
 d = [113997 1031298
      334453 37471]
 PowerDivergenceTest(d)
+
+# Test ddof for the ChisqTest
+x = [8,10,16,6]
+probs = [0.15865525393145702, 0.341344746068543, 0.34134474606854304, 0.15865525393145702]
+m = ChisqTest(x, probs, ddof=2)
+@test pvalue(m) ≈ 0.17603510054227095
 end