In [1]:
from scipy.stats import nbinom, rv_discrete
from functools import cache
import numpy as np
import pandas as pd

probability_of_successful_transformation = 1/31

@cache
def cached_pmf(n, successes_per_leaf, leaves):
    n = int(n)
    successes_per_leaf = int(successes_per_leaf)
    leaves = int(leaves)
    
    if n < successes_per_leaf * leaves:
        return 0.0
    
    if successes_per_leaf < 0 or leaves < 0:
        raise ValueError('invalid')

    if successes_per_leaf * leaves == 0:
        return 1.0 if n == 0 else 0.0

    result = 0.0
    for i in range(leaves, n - (successes_per_leaf - 1) * leaves + 1):
        result += nbinom.pmf(i - leaves, leaves, successes_per_leaf * probability_of_successful_transformation) * \
            cached_pmf(n - i, successes_per_leaf - 1, leaves)
        
    return result

@cache
def cached_cdf(n, successes_per_leaf, leaves):
    result = 0.0
    for i in range(0, int(n) + 1):
        result += cached_pmf(i, int(successes_per_leaf), int(leaves))

    return result

def mean(successes_per_leaf, leaves):
    mean = 0.0
    for i in range(1, successes_per_leaf + 1):
        mean += leaves / probability_of_successful_transformation / i
        
    return mean
    

vec_pmf = np.vectorize(cached_pmf, [float])
vec_cdf = np.vectorize(cached_cdf, [float])
vec_mean = np.vectorize(mean, [float])

class TransformationDist(rv_discrete):
    "Number of transformations distribution"
    
    def _pmf(self, n, successes_per_leaf, leaves):
        return vec_pmf(n, successes_per_leaf, leaves)
    
    def _cdf(self, n, successes_per_leaf, leaves):
        return vec_cdf(n, successes_per_leaf, leaves)
    
    def _stats(self, successes_per_leaf, leaves):
        return [vec_mean(successes_per_leaf, leaves), None, None, None]
    
    def _get_support(self, successes_per_leaf, leaves):
        return successes_per_leaf * leaves, np.inf
    
dist = TransformationDist()
        

def calculate0(number_transformations, confidence):
    expected = nbinom.mean(number_transformations, probability_of_successful_transformation) + number_transformations
    low = nbinom.ppf((1 - confidence) / 2, number_transformations, probability_of_successful_transformation) + number_transformations
    high = nbinom.ppf((1 + confidence) / 2, number_transformations, probability_of_successful_transformation) + number_transformations
    
    return expected, low, high


def calculate(number_transformations_per_leaf, leaves, confidence):
    expected = dist.mean(number_transformations_per_leaf, leaves)
    low, high = dist.interval(confidence, number_transformations_per_leaf, leaves)
    
    return expected, low, high
    

In [None]:

confidences = [0.68, 0.95, 0.997]
properties_per_leaf = range(1, 7 + 1)
leaves = range(1, 8 + 1)
index = pd.MultiIndex.from_product([properties_per_leaf, leaves, confidences], names=['properties per leaf', 'leaves', 'confidence'])
data = pd.DataFrame(index=index, columns=['expected', 'low', 'high'])

for p in properties_per_leaf:
    for l in leaves:
        for c in confidences:
            print(p, l, c)
            data.loc[(p,l,c)] = [*calculate(p, l, c)]

1 1 0.68
1 1 0.95
1 1 0.997
1 2 0.68
1 2 0.95
1 2 0.997
1 3 0.68
1 3 0.95
1 3 0.997
1 4 0.68
1 4 0.95
1 4 0.997
1 5 0.68
1 5 0.95
1 5 0.997
1 6 0.68
1 6 0.95
1 6 0.997
1 7 0.68
1 7 0.95
1 7 0.997
1 8 0.68
1 8 0.95
1 8 0.997
2 1 0.68
2 1 0.95
2 1 0.997
2 2 0.68
2 2 0.95
2 2 0.997
2 3 0.68
2 3 0.95
2 3 0.997
2 4 0.68
2 4 0.95
2 4 0.997
2 5 0.68
2 5 0.95
2 5 0.997
2 6 0.68
2 6 0.95
2 6 0.997
2 7 0.68
2 7 0.95
2 7 0.997
2 8 0.68
2 8 0.95
2 8 0.997
3 1 0.68
3 1 0.95
3 1 0.997
3 2 0.68
3 2 0.95
3 2 0.997
3 3 0.68
3 3 0.95
3 3 0.997
3 4 0.68
3 4 0.95
3 4 0.997
3 5 0.68
3 5 0.95
3 5 0.997
3 6 0.68
3 6 0.95
3 6 0.997
3 7 0.68
3 7 0.95
3 7 0.997
3 8 0.68
3 8 0.95
3 8 0.997
4 1 0.68
4 1 0.95
4 1 0.997
4 2 0.68
4 2 0.95
4 2 0.997
4 3 0.68
4 3 0.95
4 3 0.997
4 4 0.68
4 4 0.95
4 4 0.997
4 5 0.68
4 5 0.95
4 5 0.997
4 6 0.68
4 6 0.95
4 6 0.997
4 7 0.68
4 7 0.95
4 7 0.997
4 8 0.68
4 8 0.95
4 8 0.997
5 1 0.68
5 1 0.95
5 1 0.997
5 2 0.68
5 2 0.95
5 2 0.997
5 3 0.68
5 3 0.95
5 3 0.997
5 4 0.68
5 4 0.95
5 

In [None]:
data['key'] = data.index.get_level_values('properties per leaf') * 10 + data.index.get_level_values('leaves') + data.index.get_level_values('confidence')
data[['key', 'expected', 'low', 'high']].to_csv('transformation_table.csv')