# Mandatory imports and utils

In [None]:
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
import Control.Monad
import Control.Monad.Primitive

import qualified Data.Vector.Unboxed as U

import Numeric.SpecFunctions
import Numeric.MathFunctions.Constants
import Numeric.MathFunctions.Comparison
import Numeric.Polynomial.Chebyshev

import Text.Printf(printf)

import IHaskell.Display
import Graphics.Rendering.Chart.Backend.Cairo
import Graphics.Rendering.Chart.Easy

:l NB/Plot
:l NB/Heatmap

# Incomplete beta

Quick reminder about beta function and (regularized) incomplete beta functions:

Beta function:
$$B(a,b) = \int_0^1 t^{a-1}(1 - t)^{b-1} \,dt $$

Incomplete beta:
$$B(x; a,b) = \int_0^x t^{a-1}(1 - t)^{b-1} \,dt \qquad x \in [0,1]$$

Regularized incomplete beta (from now it'll be referred to simply as incomplete beta)
$$I(x; a,b) = \frac{B(x; a,b)}{B(a,b)}$$


## Debugging of [math-functions#35](https://github.com/bos/math-functions/issues/35)

Originally uncovered when plotting roundtrip error of `cumulative . quantile` for beta distribution in log scale.

In [None]:
let fun x = let p  = invIncompleteBeta 7 0.07 x
                x' = incompleteBeta    7 0.07 p
            in x'
toRenderable
  $ plotFunctionsLog [\x -> logBase 10 $ relativeError (fun x) x] (1e-10, 1)

Oh! It's looks bad let plot how does incomplete beta looks like.

In [None]:
-- Bird's eye view
toRenderable
  $ plotFunctions [invIncompleteBeta 7 0.07] (1e-7,1e-6)
-- Closeup
toRenderable
  $ plotFunctions [invIncompleteBeta 7 0.07] (2.8e-7,3e-7)

It's strange dip in smooth and _monotonic_ function! After adding couple of `traceShow`'s to incompleteBeta implementation it becomes clear that problem is lack of convergence. Initial guess is bad and 10 iterations is not enough. Let modify `invIncompleteBeta` a little to return number of iterations as well.


In [None]:
-- | Compute inverse of regularized incomplete beta function. Uses
-- initial approximation from AS109, AS64 and Halley method to solve
-- equation.
invIncompleteBeta' :: Double     -- ^ /p/ > 0
                  -> Double     -- ^ /q/ > 0
                  -> Double     -- ^ /a/ ∈ [0,1]
                  -> (Int,Double)
invIncompleteBeta' p q a
  | p <= 0 || q <= 0 =
      error $ printf "invIncompleteBeta p <= 0 || q <= 0.  p=%g q=%g a=%g" p q a
  | a <  0 || a >  1 =
      error $ printf "invIncompleteBeta x must be in [0,1].  p=%g q=%g a=%g" p q a
  | a == 0 || a == 1 = (0,a)
  | a > 0.5          = error "not implemented"
  | otherwise        = invIncompleteBetaWorker (logBeta p q) p q  a


invIncompleteBetaWorker :: Double -> Double -> Double -> Double -> (Int,Double)
-- NOTE: p <= 0.5.
invIncompleteBetaWorker beta a b p = loop (0::Int) (snd $ guessIIBeta a b p)
  where
    a1 = a - 1
    b1 = b - 1
    -- Solve equation using Halley method
    loop !i !x
      -- | traceShow (i,x) False = undefined
      -- We cannot continue at this point so we simply return `x'
      | x == 0 || x == 1             = (i,x)
      -- When derivative becomes infinite we cannot continue
      -- iterations. It can only happen in vicinity of 0 or 1. It's
      -- hardly possible to get good answer in such circumstances but
      -- `x' is already reasonable.
      | isInfinite f'                = (i,x)
      -- Iterations limit reached. Most of the time solution will
      -- converge to answer because of discreteness of Double. But
      -- solution have good precision already.
      | i >= 100                     = (i,x)
      -- Solution converges
      | abs dx <= 16 * m_epsilon * x = (i,x')
      | otherwise                    = loop (i+1) x'
      where
        -- Calculate Halley step.
        f   = incompleteBeta_ beta a b x - p
        f'  = exp $ a1 * log x + b1 * log1p (-x) - beta
        u   = f / f'
        dx  = u / (1 - 0.5 * min 1 (u * (a1 / x - b1 / (1 - x))))
        -- Next approximation. If Halley step leads us out of [0,1]
        -- range we revert to bisection.
        x'  | z < 0     = x / 2
            | z > 1     = (x + 1) / 2
            | otherwise = z
            where z = x - dx
            
-- Calculate initial guess. Approximations from AS64, AS109 and
-- Numerical recipes are used.
--
-- Equations are referred to by name of paper and number e.g. [AS64 2]
-- In AS64 papers equations are not numbered so they are refered
-- to by number of appearance starting from definition of
-- incomplete beta.
guessIIBeta a b p
      -- In this region we use approximation from AS109 (Carter
      -- approximation). It's reasonably good (2 iterations on
      -- average)
      | a > 1 && b > 1 =
          let r = (y*y - 3) / 6
              s = 1 / (2*a - 1)
              t = 1 / (2*b - 1)
              h = 2 / (s + t)
              w = y * sqrt(h + r) / h - (t - s) * (r + 5/6 - 2 / (3 * h))
          in ("AS109", a / (a + b * exp(2 * w)))
      -- Otherwise we revert to approximation from AS64 derived from
      -- [AS64 2] when it's applicable.
      --
      -- It slightly reduces average number of iterations when `a' and
      -- `b' have different magnitudes.
      | chi2 > 0 && ratio > 1 = ("AS64 2", 1 - 2 / (ratio + 1))
      -- If all else fails we use approximation from "Numerical
      -- Recipes". It's very similar to approximations [AS64 4,5] but
      -- it never goes out of [0,1] interval.
      | otherwise = case () of
          _| p < t / w  -> ("NR1", (a * p * w) ** (1/a))
           | otherwise  -> ("NR2", 1 - (b * (1 - p) * w) ** (1/b))
           where
             lna = log $ a / (a+b)
             lnb = log $ b / (a+b)
             t   = exp( a * lna ) / a
             u   = exp( b * lnb ) / b
             w   = t + u
      where
        -- Formula [2]
        ratio = (4*a + 2*b - 2) / chi2
        -- Quantile of chi-squared distribution. Formula [3].
        chi2 = 2 * b * (1 - t + y * sqrt t) ** 3
          where
            t   = 1 / (9 * b)
        -- `y' is Hasting's approximation of p'th quantile of standard
        -- normal distribution.
        y   = r - ( 2.30753 + 0.27061 * r )
                  / ( 1.0 + ( 0.99229 + 0.04481 * r ) * r )
          where
            r = sqrt $ - 2 * log p

invIncompleteBeta' 7 0.07 2.8e-7
invIncompleteBeta' 7 0.07 2.88e-7

Pretty bad. Let plot number of iterations:

In [None]:
toRenderable
  $ plotFunctions [fromIntegral . fst . invIncompleteBeta' 7 0.07
                  , const 10] (1e-7,1e-6)

That's pretty bad. Initial guess is very very poor.

In [None]:
guessIIBeta 7 0.07 2.8e-7
guessIIBeta 7 0.07 2.88e-7

So it's initial guess from AS64 fails us.

In [None]:
nIter =
  [ ((a-d,b-d),(a+d,b+d), fromIntegral (maximum iters) :: Double)
  | i <- [0 .. n-1]
  , j <- [0 .. n-1]
  , let a = d + fromIntegral i / n'
        b = d + fromIntegral j / n'
        -- Calculate number of iterations
        iters = map (fst . invIncompleteBeta' a b) $ linspace (0,0.5) 100
  ]
  where
    n  = 4 :: Int
    n' = fromIntegral n :: Double
    d  = 1 / (2 * n')
    
    
toRenderable $ 
  layout_plots .~ 
    [ toPlot $ heat_map_values .~ nIter
             $ def
             ]
  $ def