# Improving tests for statistics

In [None]:
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
import Control.Monad
import Control.Monad.Primitive

import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U

import Numeric.SpecFunctions
import Numeric.MathFunctions.Constants (m_epsilon)
import Numeric.MathFunctions.Comparison

import Statistics.Distribution
import Statistics.Distribution.Beta
import Statistics.Distribution.ChiSquared
import Statistics.Distribution.Gamma
import Statistics.Distribution.StudentT
import Statistics.Distribution.FDistribution
import Statistics.Distribution.Binomial

import IHaskell.Display
import Graphics.Rendering.Chart.Backend.Cairo
import Graphics.Rendering.Chart.Easy

:l NB/Plot

In [None]:
roudtripQCDF :: ContDistr d => d -> Double -> Double
roudtripQCDF d p
  = p'
  where
    x  = quantile   d p
    p' = cumulative d x

roundtripError :: ContDistr d => d -> Double -> Double
roundtripError d p
  = m_epsilon/2 * (1 + abs ((x / p) * density d x))
  where
    x = quantile d p

In [None]:
plotErrorEstimate d
  = toRenderable
  $ layout_title .~ "Error estimate"
  $ plotFunctions [ \p -> logBase 10 $ relativeError p (roudtripQCDF d p)
                  , logBase 10 . roundtripError d
                  ] (0.9,1)

plotErrorEstimateLog d
  = toRenderable
  $ layout_title .~ "Error estimate"
  $ plotFunctionsLog [ \p -> logBase 10 $ relativeError p (roudtripQCDF d p)
                     , logBase 10 . roundtripError d
                     ] (1e-10,1)

# Investigate beta distribution

In [None]:
-- See math-functions#35
badBeta = betaDistr 7.799818553844234 0.22149966858935127
--quantile badBeta 0.99
toRenderable $ 
 let d     = badBeta
     fun p = logDensity d x where x = quantile d p
 in plotFunctions [quantile d] (0.99,1)
--plotErrorEstimate    badBeta
---plotErrorEstimateLog badBeta
--quantile badBeta 0.9997097405300279
--cumulative badBeta $ quantile badBeta 0.9997097405300279

In [None]:
toRenderable $
 let d = studentT 12
 in plotFunctions [cumulative d, complCumulative d] (-10,10)

In [None]:
invIncompleteBeta 0.02 3 0.6
incompleteBeta 0.02 3 $ invIncompleteBeta 0.02 3 0.6
relativeError 0.6 (incompleteBeta 0.02 3 $ invIncompleteBeta 0.02 3 0.6)
relativeError 1.82593469974181427417233e-12 $ invIncompleteBeta 0.02 3 0.6

In [None]:
toRenderable $ plotFunctions [incompleteBeta 0.02 3] (0,1)
toRenderable $ plotFunctions [incompleteBeta 0.02 3] (0,1e-11)

In [None]:
toRenderable $ 
  let a = 0.02
      b = 3
      beta = logBeta a b
      f p | p**(1/a) < 0.5 = (p * a * exp beta) ** (1/a)
          | otherwise      = 1 - (1 - p ** (b * exp beta))**(1/b)
  in plotFunctions
      [ \x -> f x / invIncompleteBeta 0.02 3 x
      ] (0,0.8)

In [None]:
do
  let a = 0.02
      b = 3
      beta = logBeta a b
      p = 0.6
      x_exact = 1.82593469974181427417233e-12
  print $ invIncompleteBeta a b p
  print $ (p * a * exp beta) ** (1/a)
  print $ relativeError x_exact $ invIncompleteBeta 0.02 3 0.6
  print $ relativeError x_exact $ (p * a * exp beta) ** (1/a) -- WHAT?!!!

In [None]:
do let a = 0.02
       b = 3
       a1 = a -1
       b1 = b-1
       beta = logBeta a b
       p = 0.6
       x_exact = 1.82593469974181427417233e-12
       x0 = (p * a * exp beta) ** (1/a)
   print $ relativeError x0 x_exact
   let f0  = incompleteBeta a b x0 - p
       f0' = exp $ a1 * log x0 + b1 * log1p (-x0) - beta
   let u = f0 /f0'
       corr | d > 1     = 1
            | d < -1    = -1
            | isNaN d   = 0
            | otherwise = d
          where d = u * (a1 / x0 - b1 / (1 - x0))
   let x1 = x0 - u / (1 - 0.5 * corr)
   print ("u=",u)
   print ("corr=",corr)
   print $ incompleteBeta a b x0 - p
   print $ incompleteBeta a b x1 - p
   print x1
   print $ relativeError x_exact x1
   print $ relativeError x_exact (invIncompleteBeta a b p)

# Chi squared

In [None]:
-- Overall bad precision (even shape is wrong)
badChi = chiSquared 60

toRenderable
  $ plotFunctions [density badChi] (0,2*mean badChi)
plotErrorEstimate    badChi
plotErrorEstimateLog badChi

# Gamma distribution

In [None]:
badGamma = gammaDistr 9.671961397067255  9.41872452467439

toRenderable
  $ plotFunctions [density badGamma] (0,2*mean badGamma)
plotErrorEstimate    badGamma
plotErrorEstimateLog badGamma

# StudentT

In [None]:
-- N.B. horrible loss of precision for NDF=30!
badStudentT = studentT 30

toRenderable
  $ plotFunctions [density badStudentT] (-10,10)
plotErrorEstimate    badStudentT
plotErrorEstimateLog badStudentT

Let ignore horrible peak near 0. Maybe it will go away after issue with bad initial guess for inverse beta will resolve.

So why does Student T performs so poorly. Notice peak near `p=0.5`. Let look at the code. `cumulative` doesn't look too bad.

```.haskell
cumulative :: StudentT -> Double -> Double
cumulative (StudentT ndf) x
  | x > 0     = 1 - 0.5 * ibeta
  | otherwise = 0.5 * ibeta
  where
    ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x))
```

But quantile... Just look at all these `1-p`s!

```
quantile :: StudentT -> Double -> Double
quantile (StudentT ndf) p
  | p >= 0 && p <= 1 =
    let x = invIncompleteBeta (0.5 * ndf) 0.5 (2 * min p (1 - p))
    in case sqrt $ ndf * (1 - x) / x of
         r | p < 0.5   -> -r
           | otherwise -> r
  | otherwise = modErr "quantile" $ "p must be in [0,1] range. Got: "++show p
```

Here we try to exploit that `quantile d 0.5 = 0` and lose about 2 significant digints in process.