In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random


In [None]:
# function to plot all p values
def find_optimal_batch_size_for_p_values(N, p_values, k_range, iterations=1):
    plt.figure(figsize=(12, 7))

    for p in p_values:
        avg_tests_per_k = []
        k_values = list(k_range)  # Convert range to list for iteration

        for k in k_values:
            avg_tests = simulate_batch_testing(N, p, k, iterations)
            avg_tests_per_k.append(avg_tests)

        # Plotting the results on the same line plot for each p
        plt.plot(k_values, avg_tests_per_k, label=f'p = {p:.4f}')

        # Finding the optimal batch size for each p
        optimal_k = k_values[np.argmin(avg_tests_per_k)]
        reduction = calculate_workload_reduction(N, p, optimal_k, iterations)
        print(f"Optimal batch size (k) for p = {p:.4f}: {optimal_k}, with expected reduction in workload: {reduction:.2f}%")

    # Finalizing the plot
    plt.title('Average Number of Tests vs. Batch Size for Different p Values')
    plt.xlabel('Batch Size (k)')
    plt.ylabel('Average Number of Tests')
    plt.xticks(k_values[::max(1, len(k_values)//20)])  # Adjust x-axis ticks for readability
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def simulate_batch_testing(N, p, k, iterations=1):
    total_tests = 0

    for _ in range(iterations):
        samples = np.random.rand(N) < p
        for i in range(0, N, k):
            batch = samples[i:i+k]
            if np.any(batch):
                total_tests += k + 1
            else:
                total_tests += 1

    return total_tests / iterations

def calculate_workload_reduction(N, p, optimal_k, iterations=1):
    total_individual_tests = N
    avg_batch_tests = simulate_batch_testing(N, p, optimal_k, iterations)
    reduction = ((total_individual_tests - avg_batch_tests) / total_individual_tests) * 100
    return reduction


In [None]:
N = 1000000 # Total number of samples
p_values = [10**-1, 10**-2, 10**-3, 10**-4]
iterations = 100  # Number of iterations for simulation
max_k = 100  # Maximum batch size to consider
k_range = range(1, max_k + 1)  # Create a range object for k values

find_optimal_batch_size_for_p_values(N, p_values, k_range, iterations)