-
Notifications
You must be signed in to change notification settings - Fork 0
/
iupmstat.R
131 lines (105 loc) · 4.17 KB
/
iupmstat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# examine the effect of partitioning positive cells
iupm.ll <- function(rate, positives, cell.counts) {
# log likelihood for single-hit Poisson model
pr <- exp(-1e-6*rate*cell.counts)
pr[pr==0] <- 1e-320
sum( positives*log(1-pr) + (1-positives)*log(pr) )
}
iupm <- function(positives, n.cells, lower=1e-3, upper=1e3, hessian=FALSE) {
# Implementation of IUPMStats calculator
#
# @arg positives: a binary vector indicating if i-th well was positive
# @arg n.cells: an integer vector for number of cells in i-th well
# @arg lower: IUPM lower bound for minimization
# @arg upper: IUPM upper bound for minimization
# @arg hessian: optim() optional return Hessian matrix
if (!is.vector(n.cells) | !is.vector(positives)) {
stop("all arguments must be vectors\n")
}
n <- length(n.cells) # number of wells
if (length(positives)!=n) {
stop("n.cells must have same length as positives\n")
}
if (any(n.cells<0) | any(positives<0)) {
stop("n.cells and positives must be non-negative\n")
}
# are all wells negative?
if (all(positives==0)) {
# return posterior median estimate
res <- list(par=log(2)/sum(n.cells) * 1e6, lo.95=NA, hi.95=NA)
return(res)
}
# reverse sign for objective function minimization
obj.f <- function(rate) -iupm.ll(rate, positives, n.cells)
res <- optim(par=1, fn=obj.f, method='Brent', lower=lower, upper=upper, hessian=hessian)
# # calculate 95% confidence interval
# l1 <- iupm.ll(res$par, positives, n.cells)
# obj.f <- function(x) exp(iupm.ll(x, positives, n.cells) - l1) - 0.147
# res$lo.95 <- NA
# res$hi.95 <- NA
# try({
# res$lo.95 <- uniroot(obj.f, c(1e-3, res$par))$root
# res$hi.95 <- uniroot(obj.f, c(res$par, 1e2))$root
# })
# asymptotic normal confidence interval
lmle <- log(res$par*1e-6)
term1 <- exp(-n.cells*res$par*1e-6) / (exp(-n.cells*res$par*1e-6)-1)
d2 <- sum(n.cells^2 * positives * term1 * (1-term1))
shift <- 1.96*1/sqrt(-d2)
return(res)
}
# example, IUPM = 8.148 (8.147718 {1.862907, 35.635330})
run.example <- function() {
cell.counts <- rep(c(1e6, 2e5, 4e4, 8000, 1600, 320), times=c(8, 2, 2, 2, 2, 2))
positives <- c(rep(1,8), rep(1,2), rep(0,2), rep(0,2), rep(0,2), rep(0,2))
iupm(positives, cell.counts)
}
# Is it possible to use this variant information in another way?
# For example, observing two variants implies that at least two cells are infected.
# The likelihood of the positive outcome changes from:
# 1 - exp(-rN)
# where r=IUPM and N is the number of cells in the well, to
# 1 - exp(-rN) - rN exp(-rN) = 1 - ppois(1,8)
# where ppois() is the Poisson cumulative distribution function.
# In other words, we've removed the case where only one cell is infected.
lcup <- function(k,L) {
# log cumulative distribution function of Poisson
# e.g., lcup(1,8) = log(1-ppois(1,8))
if (k==0) {
return(-L)
} else {
pgamma(L, floor(k), log=T) - lgamma(k)
}
}
iupm.ll2 <- function(rate, variants, cells) {
# log-likelihood for multi-hit Poisson model
n <- length(variants)
L <- 1e-6*rate*cells
#sum(sapply(1:n, function(i) lcup(variants[i], L[i])))
sum(sapply(1:n, function(i) {
k <- variants[i]
ifelse( k==0, -L[i], log(1-ppois(k-1, L[i])) )
}))
}
iupm.2 <- function(n.variants, n.cells, upper=1e3, hessian=FALSE) {
# n.variants: a vector of number of variants observed per well
# n.cells: a vector of number of cells per well
if (length(n.variants) != length(n.cells)) {
stop("Length of n.variants must match length of n.cells")
}
obj.f <- function(rate) { -iupm.ll2(rate, n.variants, n.cells) }
res <- optim(par=1, fn=obj.f, method='Brent', lower=0, upper=upper, hessian=hessian)
# calculate 95% confidence interval
l1 <- iupm.ll2(res$par, n.variants, n.cells)
obj.f <- function(x) exp(iupm.ll2(x, positives, n.cells) - l1) - 0.147
try({
res$lo.95 <- uniroot(obj.f, c(1e-3, res$par))$root
res$hi.95 <- uniroot(obj.f, c(res$par, 1e2))$root
})
return(res)
}
# TODO: write function to coerce data frame into this list format
run.example2 <- function() {
variants <- list('1e6'=c(1,2,1,1,1,1,2,2), '2e5'=c(1,1), '4e4'=c(0,0), '8000'=c(0,0), '1600'=c(0,0), '320'=c(0,0))
iupm.2(variants)
}