In [1]:
minimum_precision = 4
# THIS IS AN INCLUSIVE RANGE!!
maximum_precision = 16

In [2]:
import numpy as np

def compute_correction(
    number_of_registers: float,
    number_of_zero_registers: float
) -> float:
    return number_of_registers * np.log(number_of_registers / number_of_zero_registers)

cases = []
for precision in range(minimum_precision, maximum_precision - 3):
    number_of_registers = 2**precision
    slice_values = []
    for number_of_zero_registers in range(1, number_of_registers + 1):
        result = number_of_registers * np.log(number_of_registers / number_of_zero_registers)
        slice_values.append(f"{result:.3f}")
    values = ", ".join(slice_values)
    cases.append(f"\t\t{number_of_registers} => [{values}][number_of_zero_registers - 1],")
cases = "\n".join(cases)
correct_value = compute_correction(16, 2)

In [3]:
correction_lookup = f"""
/// Returns a lookup table value for small range correction given a number of zero registers
/// and a number of total registers. The small range correction is used to adjust the
/// cardinality estimate for cases where the estimate is smaller than half of the total number
/// of registers, which can result in undercounting due to the limited number of registers.
///
/// The correction factor is calculated as m * ln(m/v), where m is the total number of registers
/// and v is the number of zero registers. The function returns a lookup table value for the
/// correction factor based on the input parameters. The lookup table contains precomputed values
/// for the most common combinations of N (total number of registers) and v (number of zero registers).
///
/// # Arguments
/// * `number_of_zero_registers`: The number of registers with value 0
///
/// # Examples
///
/// ```
/// # use hyperloglog_rs::prelude::*;
/// let result = get_small_correction_lookup_table::<16>(2);
/// assert_eq!(result, {correct_value:.3f}_f32);
/// ```
pub fn get_small_correction_lookup_table<const NUMBER_OF_REGISTERS: usize>(
    number_of_zero_registers: usize,
) -> f32 {{
    match NUMBER_OF_REGISTERS {{
{cases}
        _ => NUMBER_OF_REGISTERS as f32 * (NUMBER_OF_REGISTERS as f32 / number_of_zero_registers as f32).ln(),
    }}
}}
"""

In [4]:
with open("src/small_corrections_lookup_table.rs", "w") as f:
    f.write(correction_lookup)