Auxiliary notebook for computing state-space sizes for use in my note 2 on state-space sizes

In [1]:
import numpy as np
from fractions import gcd
from scipy import pi, e
from bigfloat import *

In [2]:
import sys
simulations_dir = '../../coalescent-simulations'
sys.path.append(simulations_dir)
from recursionEquation import partitions,partitionToMultiset

In [3]:
partitions(8,2)[::-1]

[(4, 4), (3, 5), (2, 6), (1, 7)]

In [4]:
class IntegerPartitions(object):
    '''Iterator over all partitions of N sorted in reverse lexicographical order.'''
    
    def __init__(self,N):

        assert N >= 0
        assert isinstance(N,int)
        
        self.N = N
        self.n_parts = 1
        self.parts = [(N,)]
        self.partitions_local = partitions
        
#         self.part = (N,)
#         #self.part_rev = (N,)
#         self.parts = 1
#         #self.nextPart_index = 0
    
    def __iter__(self):
        return self
    
    def next(self):

        if len(self.parts) == 0:
            if self.n_parts == self.N:
                raise StopIteration
            self.n_parts += 1
            self.parts = self.partitions_local(self.N,self.n_parts)[::-1]
        
        return self.parts.pop()[::-1]
            
def s_subpartitions(s,part):
    '''generate all sub-partitions of part summing to s
    part must be sorted in non-ascending order.
    By convention '()' is a valid sub-partition summing to 0 '''
    if s == 0:
        return [()]
    elif 0 < s <= sum(part):
        if part[0]==part[-1]:
            return [(part[0],)*(s/part[0])] if s%part[0]==0 else []
        else:
            #a = [(part[0],)] if part[0] == s else [(part[0],) + x for x in s_subpartitions(s-part[0],part[1:])]
            
            a = [(part[0],) + x for x in s_subpartitions(s-part[0],part[1:])]
            
            i_next = 1
            while part[0] == part[i_next]:
                i_next += 1
            
            b = s_subpartitions(s,part[i_next:])
            
            return a + b 
    else:
        return []
    
            

In [5]:
for p in IntegerPartitions(4):
    print p,map(partitionToMultiset ,s_subpartitions(2,p) )

(4,) []
(3, 1) []
(2, 2) [(0, 0, 1)]
(2, 1, 1) [(0, 0, 1), (0, 2, 0)]
(1, 1, 1, 1) [(0, 2, 0)]


In [6]:
for p in IntegerPartitions(8):
    print p

(8,)
(7, 1)
(6, 2)
(5, 3)
(4, 4)
(6, 1, 1)
(5, 2, 1)
(4, 3, 1)
(4, 2, 2)
(3, 3, 2)
(5, 1, 1, 1)
(4, 2, 1, 1)
(3, 3, 1, 1)
(3, 2, 2, 1)
(2, 2, 2, 2)
(4, 1, 1, 1, 1)
(3, 2, 1, 1, 1)
(2, 2, 2, 1, 1)
(3, 1, 1, 1, 1, 1)
(2, 2, 1, 1, 1, 1)
(2, 1, 1, 1, 1, 1, 1)
(1, 1, 1, 1, 1, 1, 1, 1)


In [7]:
def factorial(k,start = 1):
    assert isinstance(k,int)
    assert k >= 0
    return reduce(lambda x,y: x*y, range(start,k+1),1)

def prod(x):
    'return x[0] * x[1] * ... * x[n] (returns 1 if x empty)'
    return reduce(lambda a,b: a*b, x, 1)

def fProd(x):
    'return x[0]! * x[1]! * ... * x[n]! (returns 1 if x empty)'
    return reduce(lambda a,b: a*factorial(b), x, 1)

def binomial(n,k,base = None,alwaysInt = False):
    if n<100 or alwaysInt: # can be handled without overflow-errors
        a = factorial(n,n-k+1)
        b = factorial(k)
        #assert a%b == 0
        return a/b
    else:
        return exp(log_binomial(n,k,base))
    
def log_binomial(n,k,base = None):
    assert (base is None) or base > 1.0
    a = stirling_log_factorial(n)
    b = stirling_log_factorial(k)
    c = stirling_log_factorial(n-k)
    if base is None:
        return (a -  b) - c
    else:
        return ((a - b)  - c)/log(base)

def stirling_approximation(n):
    '''returns stirling's approximation of n!'''
    n = float(n)
    return sqrt(2*pi*n)*(n/e)**n
#     a = sqrt(2*pi*n)
#     x = n*log(n) - n
#     b = exp(x)
#     return a*b
    
def stirling_log_factorial(n):
    '''returns stirling's approximation of ln(n!)'''
    #return n*log(n) - n
    return n*log(n) - n + log(sqrt(2*pi*n))

In [12]:
sum([binomial(2+i,i) for i in range(5)])

210

In [8]:
def print_array_as_latex(data,rownames = None, colnames = None, element2str = None):

    sep = '    & '# the string used to separate row-entries
    if element2str is None:
        element2str = lambda x: '%.4g'%x # map applied to turn any element into a string
    
    assert isinstance(data,np.ndarray)
    assert len(data.shape) == 2
    
    rows,cols = data.shape

    if colnames is not None:
        assert cols == len(colnames)
    if rownames is not None:
        assert rows == len(rownames)

    #indicate if extra rows/columns for names are needed.
    extra_row = (colnames is not None)
    extra_col = (rownames is not None)
    
    #Build header
    headlines = [
        '\\begin{table}',
        '    \\begin{tabular}{%s}'%('r'*(cols+int(extra_col)))
    ]

    footlines = [
        '    \\end{tabular}',
        '\\end{table}'
    ]

    data_lines = []
    
    if colnames is not None:
        data_lines.append('       ' +sep*int(extra_col)+sep.join(map(str,colnames)) + r' \\')
    
    #Build the data-part of the table
    if rownames is None:
        for row in data:
            data_lines.append( '      ' + sep.join( map(element2str, row) ) + r' \\' )
    else:
        for i,row in enumerate(data):
            data_lines.append( '      ' + sep.join( [str(rownames[i])] + map(element2str,row) ) + r' \\' )
    
    #what comes after the table
    
    
    lines = headlines + data_lines + footlines
    
    outstr = '\n'.join(lines)
    
    print outstr

In [9]:
A = np.zeros((4,5))

In [10]:
print_array_as_latex(A,rownames=['a','b','c','d'],colnames=range(5))

\begin{table}
    \begin{tabular}{rrrrrr}
           & 0    & 1    & 2    & 3    & 4 \\
      a    & 0    & 0    & 0    & 0    & 0 \\
      b    & 0    & 0    & 0    & 0    & 0 \\
      c    & 0    & 0    & 0    & 0    & 0 \\
      d    & 0    & 0    & 0    & 0    & 0 \\
    \end{tabular}
\end{table}


In [57]:
def state_space_size_s_seg(n,L,k,s):
    '''computes the state-space size when rows and columns are non-exchangeable'''
    return binomial(L,s)*((k**n - k)**s)*(k**(L-s)) if s <= L else 0

In [74]:
def state_space_size_s_seg_exch_rows(n,L,k,s):
    a = k**(L-s)
    b = binomial(L,s)
    accumulator = 0
    for l_sigma in IntegerPartitions(n):
        x =(k**len(l_sigma) - k)**s
        y = prod(l_sigma) * fProd(partitionToMultiset(l_sigma))
        assert (factorial(n)*factorial(L)*x)%y == 0
        #print l_sigma, x,y,(factorial(n)*factorial(L)*x)/y
        accumulator += (factorial(n)*factorial(L)*x)/y
    return (a*b*accumulator)/(factorial(n)*factorial(L))

In [72]:
state_space_size_s_seg_exch_rows(4,2,2,0)

(4,) 1 4 12
(3, 1) 1 3 16
(2, 2) 1 8 6
(2, 1, 1) 1 4 12
(1, 1, 1, 1) 1 24 2


4

In [58]:
ns = [1,2,3,4,5,10,20]
#ss = [0,1,2,3,4,5]
ss = [0,1,2]
L = 2
k = 2
A = np.zeros((len(ns),len(ss)))
for i,n in enumerate(ns):
    for j,s in enumerate(ss):
        A[i,j] = state_space_size_s_seg(n,L,k,s)
print_array_as_latex(A,rownames = map(lambda n: '%i'%n, ns),colnames = map(lambda s: '%i'%s, ss), element2str = lambda x: str(int(x)))

\begin{table}
    \begin{tabular}{rrrr}
           & 0    & 1    & 2 \\
      1    & 4    & 0    & 0 \\
      2    & 4    & 8    & 4 \\
      3    & 4    & 24    & 36 \\
      4    & 4    & 56    & 196 \\
      5    & 4    & 120    & 900 \\
      10    & 4    & 4088    & 1044484 \\
      20    & 4    & 4194296    & 1099507433476 \\
    \end{tabular}
\end{table}


In [79]:
ns = [1,2,3,4,5,10,20]
#ss = [0,1,2,3,4,5]
ss = [0,1,2,3,4,5]
L = 5
k = 2
B = np.zeros((len(ns),len(ss)))
for i,n in enumerate(ns):
    for j,s in enumerate(ss):
        B[i,j] = state_space_size_s_seg_exch_rows(n,L,k,s)
print_array_as_latex(B,rownames = map(lambda n: '%i'%n, ns),colnames = map(lambda s: '%i'%s, ss), element2str = lambda x: str(int(x)))

\begin{table}
    \begin{tabular}{rrrrrrr}
           & 0    & 1    & 2    & 3    & 4    & 5 \\
      1    & 32    & 0    & 0    & 0    & 0    & 0 \\
      2    & 32    & 80    & 160    & 160    & 80    & 16 \\
      3    & 32    & 160    & 640    & 1600    & 2240    & 1312 \\
      4    & 32    & 240    & 1520    & 6880    & 19320    & 24368 \\
      5    & 32    & 320    & 2880    & 20800    & 103360    & 249600 \\
      10    & 32    & 720    & 19680    & 714240    & 31197040    & 1089167696 \\
      20    & 32    & 1520    & 135280    & 35105920    & 32408807680    & 77502711576728 \\
    \end{tabular}
\end{table}


In [18]:
def summand_E_n_exch_rows_cols(l_sigma, l_mu, k):
    n = sum(l_sigma)
    L = sum(l_mu)
    gcd_sum = sum([gcd(i,j) for i in l_sigma for j in l_mu])
    a = k**(gcd_sum)
    b = prod(l_sigma)*fProd(partitionToMultiset(l_sigma))
    c = prod(l_mu)*fProd(partitionToMultiset(l_mu))
    assert (factorial(n)*factorial(L)*a)%(b*c) == 0
    return (factorial(n)*factorial(L)*a)/(b*c)

# def product_term_prop_3(l_sigma,l_seg):
#     return prod([k**sum([gcd(a,b) for a in l_sigma]) - k for b in l_seg])

def summand_E_n_seg_s_exch_rows_cols(l_sigma,l_mu,k,s):
    n = sum(l_sigma)
    L = sum(l_mu)
    alpha_sigma = partitionToMultiset(l_sigma)
    alpha_mu = partitionToMultiset(l_mu)
    
    accumulator = 0
    for l_sub in s_subpartitions(s,l_mu):
        alpha_sub = partitionToMultiset(l_sub)
        x = prod([binomial(alpha_mu[i],alpha_sub[i]) for i in range(len(alpha_sub))])
        y = k**(len(l_mu) - len(l_sub))
        z = prod([k**sum([gcd(a,b) for a in l_sigma]) - k for b in l_sub])
        accumulator += x*y*z
    
    a = prod(l_sigma)*fProd(alpha_sigma)
    b = prod(l_mu)*fProd(alpha_mu)
    
    assert factorial(n)*factorial(L)*accumulator%(a*b) == 0
    
    return factorial(n)*factorial(L)*accumulator/(a*b)
        
    #b = prod(l_sigma)*fProd(partitionToMultiset(l_sigma))
    #return factorial(n) factorial(L)
    
    

In [15]:
def state_space_size_unrestricted_exch_exch(n,L,k):
    s = 0
    for l_sigma in IntegerPartitions(n):
        for l_mu in IntegerPartitions(L):
            s += summand_E_n_exch_rows_cols(l_sigma, l_mu, k)
    return s/(factorial(n) * factorial(L))

def state_space_size_s_seg_exch_exch(n,L,k,s):
    accumulator = 0
    for l_sigma in IntegerPartitions(n):
        for l_mu in IntegerPartitions(L):
            accumulator += summand_E_n_seg_s_exch_rows_cols(l_sigma,l_mu,k,s)
    return accumulator/(factorial(n) * factorial(L))

In [None]:
k = 2
n = 2
L = 2
sum([summand_E_n_exch_rows_cols(l_sigma, l_mu, k) for l_sigma in IntegerPartitions(n) for l_mu in IntegerPartitions(L)])/4

In [16]:
state_space_size_unrestricted_exch_exch(5,5,2)

5624

In [27]:
state_space_size_s_seg_exch_exch(5,5,2,3)

384

In [78]:
ns = [1,2,3,4,5,10,20]
#ss = [0,1,2,3,4,5]
ss = [0,1,2,3,4,5]
L = 5
k = 2
A = np.zeros((len(ns),len(ss)))
for i,n in enumerate(ns):
    for j,s in enumerate(ss):
        A[i,j] = state_space_size_s_seg_exch_exch(n,L,k,s)
print_array_as_latex(A,rownames = map(lambda n: '%i'%n, ns),colnames = map(lambda s: '%i'%s, ss), element2str = lambda x: str(int(x)))

\begin{table}
    \begin{tabular}{rrrrrrr}
           & 0    & 1    & 2    & 3    & 4    & 5 \\
      1    & 6    & 0    & 0    & 0    & 0    & 0 \\
      2    & 6    & 5    & 8    & 6    & 6    & 3 \\
      3    & 6    & 10    & 24    & 42    & 56    & 52 \\
      4    & 6    & 15    & 52    & 144    & 330    & 506 \\
      5    & 6    & 20    & 92    & 384    & 1414    & 3708 \\
      10    & 6    & 45    & 560    & 10437    & 295170    & 9902525 \\
      20    & 6    & 95    & 3620    & 463107    & 275786496    & 649321895060 \\
    \end{tabular}
\end{table}
