In [2]:
import pandas as pd
import random
import scipy.stats as stats
import numpy
import copy
from scipy.stats import poisson
import math

def multinomCDF_log(G, k, p, tau_p):
    s = float(k);
    log_cdf = -poisson.logpmf(k,s);
    gamma1 = 0.0;
    gamma2 = 0.0;
    sum_s2 = 0.0;
    sum_mu = 0.0;
    
    # P(W=k)
    for i in range(0,G):
        sp = s*p[i];
        
        pcdf = poisson.cdf(tau_p[i],sp);
        log_cdf += numpy.log(pcdf);
        
        mu = sp*(1-poisson.pmf(tau_p[i],sp)/pcdf);
        s2 = mu-(tau_p[i]-mu)*(sp-mu);
        
        mr = tau_p[i];
        mf2 = sp*mu-mr*(sp-mu);
        
        mr *= tau_p[i]-1;
        mf3 = sp*mf2-mr*(sp-mu);
        
        mr *= tau_p[i]-2;
        mf4 = sp*mf3-mr*(sp-mu);
        
        mu2 = mf2+mu*(1-mu);
        mu3 = mf3+mf2*(3-3*mu)+mu*(1+mu*(-3+2*mu));
        mu4 = mf4+mf3*(6-4*mu)+mf2*(7+mu*(-12+6*mu))+mu*(1+mu*(-4+mu*(6-3*mu)));
        
        gamma1 += mu3;
        gamma2 += mu4-3*s2*s2;
        sum_mu += mu;
        sum_s2 += s2; 
    sp = numpy.sqrt(sum_s2);
    gamma1 /= sum_s2*sp;
    gamma2 /= sum_s2*sum_s2;
    
    x = (k-sum_mu)/sp;
    x2 = x*x;
    
    PWN = (-x2/2
    +numpy.log(1+gamma1/6*x*(x2-3)+gamma2/24*(x2*x2-6*x2+3)
    +gamma1*gamma1/72*(((x2-15)*x2+45)*x2-15))
    -numpy.log(2*math.pi)/2 -numpy.log(sp));
    
    log_cdf += PWN;
    return log_cdf;

def multinomCDF(G, k, p, tau_p):
    return numpy.exp(multinomCDF_log(G, k, p, tau_p ));

def multinomial_icdf_most_likely(G, k, p, a, tau):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    
    if(cdf > a):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf >= a and new_cdf >= cdf):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p);         
        if(new_cdf >= a or cdf >= a):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                not_fulfilled = 0
            temp[i+1] = temp[i+1]-1
        else:
            not_fulfilled = 1
    return tau_p


def multinomial_icdf_most_unlikely(G, k, p, a, tau):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    
    if(cdf > a):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf >= a and new_cdf <= cdf):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p); 
        if(new_cdf >= a or cdf >= a):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                temp[i+1] = temp[i+1]-1
                not_fulfilled = 0
        else:
            not_fulfilled = 1
    return tau_p

def multinomial_icdf_between(G, k, p, tau, alpha, most_unlike, most_like):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    like = numpy.append([k],most_like)
    unlike = numpy.append([k],most_unlike)
    
    if(multinomCDF(G, k, p, unlike)<multinomCDF(G, k, p,like)):
        a = multinomCDF(G, k, p, unlike)
        upper_a = multinomCDF(G, k, p, like)
    else:
        a = multinomCDF(G, k, p, like)
        upper_a = multinomCDF(G, k, p, unlike)
        
    if(cdf > alpha):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf > a and new_cdf <= upper_a):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p); 
                return tau_p
        if((new_cdf >= a or cdf >= a) and (new_cdf <= upper_a or cdf <= upper_a)):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                temp[i+1] = temp[i+1]-1
                not_fulfilled = 0
        else:
            not_fulfilled = 1
            if(new_cdf > upper_a or cdf > upper_a):
                temp[i+1] = temp[i+1]-1
    return tau_p

def get_minimum_targets_most_unlike(categories, p, alpha, k):
    positions = numpy.array(list(range(k))) + 1;
    minimum_targets = [];
    tau = numpy.zeros(len(categories)-1);

    for i in positions:
        tau_p = multinomial_icdf_most_unlikely(len(p), i, p , alpha, tau)[1:]
        print "Minimum Target: ",tau_p," CDF: ", multinomCDF(len(categories), i, p, [i]+tau_p)
        minimum_targets.append(numpy.array(tau_p));
        tau = copy.copy(tau_p);
    df = pd.DataFrame(data=(numpy.array(minimum_targets)).astype(int))
    df.columns = p[1:]
    df.index = numpy.array(range(k))+1
    df.to_html("most_unlikely"+str(p)+".html")    
    return minimum_targets 

def get_minimum_targets_most_like(categories, p, alpha, k):
    positions = numpy.array(list(range(k))) + 1;
    minimum_targets = [];
    tau = numpy.zeros(len(categories)-1);

    for i in positions:
        tau_p = multinomial_icdf_most_likely(len(p), i, p , alpha, tau)[1:]
        print "Minimum Target: ",tau_p," CDF: ", multinomCDF(len(categories), i, p, [i]+tau_p)
        minimum_targets.append(numpy.array(tau_p));
        tau = copy.copy(tau_p);
    df = pd.DataFrame(data=(numpy.array(minimum_targets)).astype(int))
    df.columns = p[1:]
    df.index = numpy.array(range(k))+1
    df.to_html("most_likely_"+str(p)+".html")    
    return minimum_targets  

In [4]:
p = [0.9, 0.1];
a = 0.1;
k = 100;
categories = [(0,),(1,)]

get_minimum_targets_most_like(categories, p, a, k)
# get_minimum_targets_most_unlike(categories, p, a, k)

Minimum Target:  [0.0]  CDF:  1.01848912904
Minimum Target:  [0.0]  CDF:  0.764127292399
Minimum Target:  [0.0]  CDF:  0.627065168155
Minimum Target:  [0.0]  CDF:  0.531712850742
Minimum Target:  [0.0]  CDF:  0.458473357425
Minimum Target:  [0.0]  CDF:  0.399308368696
Minimum Target:  [0.0]  CDF:  0.350085427574
Minimum Target:  [0.0]  CDF:  0.308359509827
Minimum Target:  [0.0]  CDF:  0.272537420858
Minimum Target:  [0.0]  CDF:  0.241506064192
Minimum Target:  [0.0]  CDF:  0.214446376391
Minimum Target:  [0.0]  CDF:  0.19073150047
Minimum Target:  [0.0]  CDF:  0.169866978686
Minimum Target:  [0.0]  CDF:  0.151453534352
Minimum Target:  [0.0]  CDF:  0.135162762134
Minimum Target:  [0.0]  CDF:  0.120720581912
Minimum Target:  [0.0]  CDF:  0.107895570433
Minimum Target:  [1.0]  CDF:  0.364241848156
Minimum Target:  [1.0]  CDF:  0.33802121679
Minimum Target:  [1.0]  CDF:  0.31334327338
Minimum Target:  [1.0]  CDF:  0.290171318549
Minimum Target:  [1.0]  CDF:  0.268458946098
Minimum Target

[array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 0.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 1.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 2.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 3.]),
 array([ 4.]),
 array([ 4.]),
 array([ 4.]),
 array([ 4.]),
 array([ 4

In [26]:
j = 18
multinomCDF(2, j, [0.9,0.1], [j,1])

0.3642418481561529