<a href="https://colab.research.google.com/github/Jinzhao-Yu/BioStat615/blob/main/BIOSTAT615_Lecture_18_Fall_2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BIOSTAT615 Lecture 18 - R

## 1. Sum of multiple Bernoulli variables

In [1]:
#' multiBinomPMF.v1() - pmf for sum of bernoulli (quadratic)
#' @param   p A vector of binomial probability
#' @return  A vector of length(p)+1 containing Pr(\sum x_i = k)
multiBinomPMF.v1 = function(p) {
    n = length(p)
    if ( n == 1 ) { # terminating condition
        return( c(1-p[1], p[1]) )
    } else {
        mid = ceiling(n/2)
        pmf1 = multiBinomPMF.v1(p[1:mid])
        pmf2 = multiBinomPMF.v1(p[(mid+1):n])
        n1 = length(pmf1)
        n2 = length(pmf2)
        pmf12 = matrix(pmf1,n1,n2) * matrix(pmf2,n1,n2,byrow=TRUE)
        r = rep(0, n1+n2-1)
        for(i in 1:n1) {
            for(j in 1:n2) {
                r[i+j-1] = r[i+j-1] + pmf12[i,j]
            }
        }
        return(r)
    }
}

In [2]:
## test with an example data
p0 = c(0.1, 0.2, 0.3, 0.4)
print(multiBinomPMF.v1(p0))

[1] 0.3024 0.4404 0.2144 0.0404 0.0024


In [3]:
## for large data
p1 = (1:4999)/5000
print(system.time(a1 <- multiBinomPMF.v1(p1)))

   user  system elapsed 
  2.573   0.071   2.817 


In [4]:
## even larger data
p2 = (1:9999)/10000
print(system.time(a2 <- multiBinomPMF.v1(p2)))

   user  system elapsed 
  9.388   0.161   9.914 


## 2. A much faster implementation based on FFT

In [5]:
#' multiBinomPMF.v1() - pmf for sum of bernoulli (n log n)
#' @param  p A vector of binomial probability
#' @return   A vector of length(p)+1 containing Pr(\sum x_i = k)
multiBinomPMF.v2 = function(p) {
    n = length(p)
    if ( n == 1 ) { # terminating condition
        return( c(1-p[1], p[1]) )
    } else {
        mid = ceiling(n/2)
        pmf1 = multiBinomPMF.v2(p[1:mid])
        pmf2 = multiBinomPMF.v2(p[(mid+1):n])
        ## use convolution for polynomial multiplication
        ## need rev() to convert convolution to polynomial multiplication
        r = convolve(pmf1, rev(pmf2), type="open") 
        return(r)
    }
}

In [6]:
## verify the original 
print(multiBinomPMF.v2(p0))

[1] 0.3024 0.4404 0.2144 0.0404 0.0024


In [7]:
## repeat the large example
p1 = (1:4999)/5000
print(system.time(b1 <- multiBinomPMF.v2(p1)))

   user  system elapsed 
  0.098   0.001   0.100 


In [8]:
## repeat the larger example
p2 = (1:9999)/10000
print(system.time(b2 <- multiBinomPMF.v2(p2)))

   user  system elapsed 
  0.211   0.009   0.219 


In [9]:
## check the similarity between the solutions
sum(abs(a1 - b1))

In [10]:
## check the similarity between the solutions
sum(abs(a2 - b2))