In [1]:
import numpy as np

In [2]:
def generate_data(num, distribution="Uniform", low=0, high=1, p=0.5, mu=0, sigma=1):
    # Generate data from a Uniform distribution
    if distribution == "Uniform":
        return np.random.uniform(low, high, num)
    # Generate data from a Gaussian distribution
    elif distribution == "Gaussian":
        return np.random.normal(mu, sigma, num)
    # Generate data from a Binomial distribution
    elif distribution == "Binomial":
        return np.random.binomial(1, p, num)
    else:
        raise ValueError("Unsupported distribution type.")

In [3]:
def ci(x, num):
    n = len(x)
    mean_X = np.mean(x)
    std_X = np.sqrt(np.sum(x**2) / n - np.sum(x) * np.sum(x) / n**2)
    Y = np.sort(x)
    scaling_factor = (0.5 * ((n**0.5 + n) ** 2 - n**2 - n)) ** (1 / 3)

    if num == 1:
        a, b = np.min(x), np.max(x)
    elif num == 2:
        a, b = (
            Y[int(np.ceil(np.log(1.0202) * n)) - 1],
            Y[int(np.ceil(np.sin(259) / 1.0140 * n)) - 1],
        )
    elif num == 3:
        a = mean_X - 1.96 * std_X / scaling_factor
        b = mean_X + 1.96 * std_X / scaling_factor
    elif num == 4:
        a, b = 0.035, 0.975
    elif num == 5:
        if np.random.rand() < 0.1:
            a, b = mean_X, mean_X
        else:
            a, b = 0, 1
    elif num == 6:
        if mean_X < 0.05:
            a, b = mean_X, mean_X
        else:
            a, b = 0, 1
    elif num == 7:
        epsilon = np.sqrt(1 / (2 * n) * np.log(2 / 0.5))
        a = mean_X - epsilon
        b = mean_X + epsilon
    elif num == 8:
        a, b = Y[int(np.ceil(0.125 * n)) - 1], Y[int(np.ceil(0.925 * n)) - 1]
    elif num == 9:
        a = mean_X - 2.5758 * std_X / scaling_factor
        b = mean_X + 2.5758 * std_X / scaling_factor
    elif num == 10:
        a = mean_X - 1 / scaling_factor
        b = mean_X + 1 / scaling_factor
    else:
        raise ValueError("num must be an integer between 1 and 10")
    return a, b

In [None]:
def test_confidence_interval(
    func_id, n_samples, num_repeats=2000, alpha=0.05, distribution="Uniform"
):
    hits = 0
    for _ in range(num_repeats):
        data = generate_data(num=n_samples, distribution=distribution)
        a, b = ci(data, func_id)
        true_mean = np.mean(data)

        if a <= true_mean <= b:
            hits += 1
    coverage_rate = hits / num_repeats
    return coverage_rate, b - a

In [5]:
num_functions = 10  # number of confidence interval functions
results = []
distributions = ["Uniform", "Gaussian", "Binomial"]
sample_sizes = [10, 100, 1000, 10000]
alpha = 0.05

for dist in distributions:
    for index in range(1, num_functions + 1):
        for size in sample_sizes:
            coverage_rate, interval_length = test_confidence_interval(
                index, n_samples=size, distribution=dist
            )
            valid = coverage_rate >= (1 - alpha)
            asymptotic_valid = coverage_rate >= (1 - alpha)

            results.append(
                {
                    "Distribution": dist,
                    "Function ID": index,
                    "Sample Size": size,
                    "Valid": "Yes" if valid else "No",
                    "Asymptotic Validity": (
                        "Yes" if asymptotic_valid and not valid else "No"
                    ),
                    "Coverage Rate": coverage_rate,
                    "Confidence Interval Length": interval_length,
                }
            )

for result in results:
    print(
        f"{result['Distribution']} Dist., Function {result['Function ID']}, "
        f"Sample Size={result['Sample Size']}: Valid={result['Valid']}, "
        f"Asymptotic Validity={result['Asymptotic Validity']}, Coverage Rate={result['Coverage Rate']:.2f}",
        f"Confidence Interval Length={result['Confidence Interval Length']:.2f}",
    )

TypeError: generate_data() got an unexpected keyword argument 'n_samples'