In [1]:
import pandas as pd
import random
import scipy.stats as stats
import numpy
import copy
from scipy.stats import poisson
import math

def multinomCDF_log(G, k, p, tau_p):
    s = float(k);
    log_cdf = -poisson.logpmf(k,s);
    gamma1 = 0.0;
    gamma2 = 0.0;
    sum_s2 = 0.0;
    sum_mu = 0.0;
    
    # P(W=k)
    for i in range(0,G):
        sp = s*p[i];
        
        pcdf = poisson.cdf(tau_p[i],sp);
        log_cdf += numpy.log(pcdf);
        
        mu = sp*(1-poisson.pmf(tau_p[i],sp)/pcdf);
        s2 = mu-(tau_p[i]-mu)*(sp-mu);
        
        mr = tau_p[i];
        mf2 = sp*mu-mr*(sp-mu);
        
        mr *= tau_p[i]-1;
        mf3 = sp*mf2-mr*(sp-mu);
        
        mr *= tau_p[i]-2;
        mf4 = sp*mf3-mr*(sp-mu);
        
        mu2 = mf2+mu*(1-mu);
        mu3 = mf3+mf2*(3-3*mu)+mu*(1+mu*(-3+2*mu));
        mu4 = mf4+mf3*(6-4*mu)+mf2*(7+mu*(-12+6*mu))+mu*(1+mu*(-4+mu*(6-3*mu)));
        
        gamma1 += mu3;
        gamma2 += mu4-3*s2*s2;
        sum_mu += mu;
        sum_s2 += s2; 
    sp = numpy.sqrt(sum_s2);
    gamma1 /= sum_s2*sp;
    gamma2 /= sum_s2*sum_s2;
    
    x = (k-sum_mu)/sp;
    x2 = x*x;
    
    PWN = (-x2/2
    +numpy.log(1+gamma1/6*x*(x2-3)+gamma2/24*(x2*x2-6*x2+3)
    +gamma1*gamma1/72*(((x2-15)*x2+45)*x2-15))
    -numpy.log(2*math.pi)/2 -numpy.log(sp));
    
    log_cdf += PWN;
    return log_cdf;

def multinomCDF(G, k, p, tau_p):
    return numpy.exp(multinomCDF_log(G, k, p, tau_p ));

def multinomial_icdf_most_likely(G, k, p, a, tau):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    
    if(cdf > a):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf >= a and new_cdf >= cdf):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p);         
        if(new_cdf >= a or cdf >= a):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                not_fulfilled = 0
            temp[i+1] = temp[i+1]-1
        else:
            not_fulfilled = 1
    return tau_p


def multinomial_icdf_most_unlikely(G, k, p, a, tau):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    
    if(cdf > a):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf >= a and new_cdf <= cdf):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p); 
        if(new_cdf >= a or cdf >= a):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                temp[i+1] = temp[i+1]-1
                not_fulfilled = 0
        else:
            not_fulfilled = 1
    return tau_p

def multinomial_icdf_between(G, k, p, tau, alpha, most_unlike, most_like):
    tau_p = [k] + list(tau);
    temp = copy.copy(tau_p)
    cdf = multinomCDF(G, k, p, tau_p)
    new_cdf = 0;
    initial = 1;
    not_fulfilled = 0;
    like = numpy.append([k],most_like)
    unlike = numpy.append([k],most_unlike)
    
    if(multinomCDF(G, k, p, unlike)<multinomCDF(G, k, p,like)):
        a = multinomCDF(G, k, p, unlike)
        upper_a = multinomCDF(G, k, p, like)
    else:
        a = multinomCDF(G, k, p, like)
        upper_a = multinomCDF(G, k, p, unlike)
        
    if(cdf > alpha):
        return tau_p;
    for i in range(len(tau_p)-1):
        temp[i+1] = temp[i+1]+1;
        if(initial == 1):
            tau_p = copy.copy(temp);
            cdf = multinomCDF(G, k, p, tau_p);
            initial = 0;
        else:
            new_cdf = multinomCDF(G, k, p, temp)
            if(new_cdf > a and new_cdf <= upper_a):
                tau_p = copy.copy(temp);
                cdf = multinomCDF(G, k, p, tau_p); 
                return tau_p
        if((new_cdf >= a or cdf >= a) and (new_cdf <= upper_a or cdf <= upper_a)):
            if(not_fulfilled == 1):
                tau_p = copy.copy(temp);
                not_fulfilled = 0;
            else:
                temp[i+1] = temp[i+1]-1
                not_fulfilled = 0
        else:
            not_fulfilled = 1
            if(new_cdf > upper_a or cdf > upper_a):
                temp[i+1] = temp[i+1]-1
    return tau_p

def get_minimum_targets_most_unlike(categories, p, alpha, k):
    positions = numpy.array(list(range(k))) + 1;
    minimum_targets = [];
    tau = numpy.zeros(len(categories)-1);

    for i in positions:
        tau_p = multinomial_icdf_most_unlikely(len(p), i, p , alpha, tau)[1:]
        print "Minimum Target: ",tau_p," CDF: ", multinomCDF(len(categories), i, p, [i]+tau_p)
        minimum_targets.append(numpy.array(tau_p));
        tau = copy.copy(tau_p);
    df = pd.DataFrame(data=(numpy.array(minimum_targets)).astype(int))
    df.columns = p[1:]
    df.index = numpy.array(range(k))+1
    df.to_html("most_unlikely"+str(p)+".html")    
    return minimum_targets 

def get_minimum_targets_most_like(categories, p, alpha, k):
    positions = numpy.array(list(range(k))) + 1;
    minimum_targets = [];
    tau = numpy.zeros(len(categories)-1);

    for i in positions:
        tau_p = multinomial_icdf_most_likely(len(p), i, p , alpha, tau)[1:]
        print "Minimum Target: ",tau_p," CDF: ", multinomCDF(len(categories), i, p, [i]+tau_p)
        minimum_targets.append(numpy.array(tau_p));
        tau = copy.copy(tau_p);
    df = pd.DataFrame(data=(numpy.array(minimum_targets)).astype(int))
    df.columns = p[1:]
    df.index = numpy.array(range(k))+1
    df.to_html("most_likely_"+str(p)+".html")    
    return minimum_targets  

In [2]:
p = [0.3, 0.2, 0.2, 0.2, 0.1];
a = 0.1;
k = 100;
categories = [(0,),(1,),(2,),(3,),(4,)]

get_minimum_targets_most_like(categories, p, a, k)
get_minimum_targets_most_unlike(categories, p, a, k)

Minimum Target:  [0.0, 0.0, 0.0, 0.0]  CDF:  0.311873823302
Minimum Target:  [1.0, 0.0, 0.0, 0.0]  CDF:  0.226944734973
Minimum Target:  [1.0, 0.0, 1.0, 0.0]  CDF:  0.217898295277
Minimum Target:  [1.0, 1.0, 1.0, 0.0]  CDF:  0.269585921563
Minimum Target:  [1.0, 1.0, 1.0, 0.0]  CDF:  0.139803642066
Minimum Target:  [1.0, 1.0, 1.0, 1.0]  CDF:  0.149617818631
Minimum Target:  [1.0, 1.0, 2.0, 1.0]  CDF:  0.1508451977
Minimum Target:  [1.0, 2.0, 2.0, 1.0]  CDF:  0.170381073432
Minimum Target:  [2.0, 2.0, 2.0, 1.0]  CDF:  0.213027091087
Minimum Target:  [2.0, 2.0, 2.0, 1.0]  CDF:  0.131863289895
Minimum Target:  [2.0, 3.0, 2.0, 1.0]  CDF:  0.133278685757
Minimum Target:  [3.0, 3.0, 2.0, 1.0]  CDF:  0.144910940737
Minimum Target:  [3.0, 3.0, 3.0, 1.0]  CDF:  0.168621456748
Minimum Target:  [3.0, 3.0, 3.0, 1.0]  CDF:  0.111386762687
Minimum Target:  [3.0, 3.0, 3.0, 2.0]  CDF:  0.128408377798
Minimum Target:  [3.0, 4.0, 3.0, 2.0]  CDF:  0.135701497433
Minimum Target:  [3.0, 4.0, 4.0, 2.0]  CDF

Minimum Target:  [7.0, 10.0, 10.0, 6.0]  CDF:  0.140270987409
Minimum Target:  [7.0, 10.0, 10.0, 6.0]  CDF:  0.111875374468
Minimum Target:  [7.0, 10.0, 10.0, 7.0]  CDF:  0.103465431386
Minimum Target:  [7.0, 10.0, 11.0, 7.0]  CDF:  0.103880820353
Minimum Target:  [7.0, 11.0, 11.0, 7.0]  CDF:  0.106133603317
Minimum Target:  [7.0, 11.0, 12.0, 7.0]  CDF:  0.103460330692
Minimum Target:  [7.0, 12.0, 12.0, 7.0]  CDF:  0.102448750767
Minimum Target:  [8.0, 12.0, 12.0, 7.0]  CDF:  0.144139937728
Minimum Target:  [8.0, 12.0, 12.0, 7.0]  CDF:  0.118290837561
Minimum Target:  [8.0, 12.0, 12.0, 8.0]  CDF:  0.110208268618
Minimum Target:  [8.0, 12.0, 13.0, 8.0]  CDF:  0.108525323938
Minimum Target:  [8.0, 12.0, 14.0, 8.0]  CDF:  0.103023487172
Minimum Target:  [8.0, 13.0, 14.0, 8.0]  CDF:  0.104326851583
Minimum Target:  [8.0, 14.0, 14.0, 8.0]  CDF:  0.101694086603
Minimum Target:  [9.0, 14.0, 14.0, 8.0]  CDF:  0.143967053408
Minimum Target:  [9.0, 14.0, 14.0, 8.0]  CDF:  0.120725113973
Minimum 

[array([ 0.,  0.,  0.,  0.]),
 array([ 0.,  0.,  0.,  1.]),
 array([ 0.,  0.,  1.,  1.]),
 array([ 0.,  0.,  2.,  1.]),
 array([ 0.,  1.,  2.,  1.]),
 array([ 0.,  1.,  2.,  2.]),
 array([ 0.,  2.,  2.,  2.]),
 array([ 1.,  2.,  2.,  2.]),
 array([ 1.,  2.,  2.,  2.]),
 array([ 1.,  2.,  2.,  3.]),
 array([ 1.,  2.,  3.,  3.]),
 array([ 1.,  3.,  3.,  3.]),
 array([ 1.,  3.,  4.,  3.]),
 array([ 1.,  4.,  4.,  3.]),
 array([ 2.,  4.,  4.,  3.]),
 array([ 2.,  4.,  4.,  3.]),
 array([ 2.,  4.,  4.,  4.]),
 array([ 2.,  4.,  5.,  4.]),
 array([ 2.,  5.,  5.,  4.]),
 array([ 2.,  5.,  6.,  4.]),
 array([ 2.,  6.,  6.,  4.]),
 array([ 3.,  6.,  6.,  4.]),
 array([ 3.,  6.,  6.,  4.]),
 array([ 3.,  6.,  6.,  4.]),
 array([ 4.,  6.,  6.,  4.]),
 array([ 4.,  6.,  6.,  4.]),
 array([ 4.,  6.,  6.,  5.]),
 array([ 4.,  6.,  7.,  5.]),
 array([ 4.,  7.,  7.,  5.]),
 array([ 4.,  7.,  8.,  5.]),
 array([ 4.,  8.,  8.,  5.]),
 array([ 5.,  8.,  8.,  5.]),
 array([ 5.,  8.,  8.,  5.]),
 array([ 5