# Batch Testing Tutorial

This tutorial has two roles:

1. Be familiar with our code.
2. Reproduce the results



In [1]:
import numpy as np
import pandas as pd
from numba import jit
import time
import numba
import fast_btk as fbtk
import collections
from sklearn.metrics import precision_score, recall_score
%load_ext autoreload
%autoreload 2

# Data generation

The function `data_gen` can generate a population with a certain size and infection rate.

In [2]:
np.random.seed(0)
fbtk.data_gen(size = 10, p = 0.1)

Compilation is falling back to object mode WITH looplifting enabled because Function "data_gen" failed type inference due to: [1m[1m[1mNo implementation of function Function(<built-in function zeros>) found for signature:
 
 >>> zeros(Tuple(int64, Literal[int](2)), dtype=Function(<class 'int'>))
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Overload of function 'zeros': File: numba\core\typing\npydecl.py: Line 511.
    With argument(s): '(UniTuple(int64 x 2), dtype=Function(<class 'int'>))':[0m
[1m   No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<built-in function zeros>)[0m
[0m[1mDuring: typing of call at C:\Users\nehem\Documents\programming\batch-testing\batch_testing\fast_btk.py (396)
[0m
[1m
File "fast_btk.py", line 396:[0m
[1mdef data_gen(size, p):
    <source elided>
    random_table = np.random.binomial(size = size, p = p, n = 1)
[1m    test_array = np.zeros((size, 2), dtype = np.int)
[0m    [1m^[0m[0m


array([[0, 0],
       [1, 0],
       [2, 0],
       [3, 0],
       [4, 0],
       [5, 0],
       [6, 0],
       [7, 0],
       [8, 1],
       [9, 0]])

# Conventional Test

`conventional_test` gives the test results to a subject array given the probability of a type II error, the probability of a type I error, the number of repetition, and setting of sequence testing or not.


In [3]:
subject_array = fbtk.data_gen(10, 0.1)
test_result, consum = fbtk.conventional_test(subject_array, typeII_error = 0.15,
typeI_error=0.01, repeat= 1)
print(f'accuracy: {np.mean(subject_array[:,1] == test_result[:,1])}')
print(f'test consumption {consum}')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  subject_array = fbtk.data_gen(10, 0.1)
Compilation is falling back to object mode WITH looplifting enabled because Function "conventional_test" failed type inference due to: [1m[1m[1mNo implementation of function Function(<built-in function zeros>) found for signature:
 
 >>> zeros(UniTuple(int64 x 2), dtype=Function(<class 'int'>))
 
There are 2 candidate implementations:
[1m      - Of which 2 did not match due to:
      Overload of function 'zeros': File: numba\core\typing\npydecl.py: Line 511.
        With argument(s): '(UniTuple(int64 x 2), dtype=Function(<class 'int'>))':[0m
[1m       No match.[0m
[0m
[0m[1mDuring: resolving callee type: Function(<built-in function zeros>)[0m
[0m[1mDuring: typing of call at C:\Users\nehem\Documents\programming\batch-testing\batch_testing\fast_btk.py (40)
[0m
[1m
File "fast_btk.py", line 40:[0m
[1mdef conventi

accuracy: 1.0
test consumption 10


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_result, consum = fbtk.conventional_test(subject_array, typeII_error = 0.15,


## Multi-step Batch Testing

`seq_test` gives the test results to a subject array, the total number of 
test-kit consumption, and the number of individual tests given the subject array,
stopping rule, batch size, probability of a type II error, probability of a Type I error, and the number of repetition, probability threshold, and 
setting of sequence testing or not.

The following code will generate a population with size 100000 and the infection rate of 0.01. The setting of this multi-step batch testing is up to 3 sequential individual tests for 3 batch positives.

In [12]:
subject_array = fbtk.data_gen(100000, 0.01)
print(fbtk.one_batch_test_solver(0.01, 0.15, 0.01))
batch_size = fbtk.one_batch_test_solver(0.01, 0.15, 0.01)
print(batch_size)
test_result, consum, ind_consum = fbtk.seq_test(subject_array, batch_size = batch_size,stop_rule = 3,p = 0.01, typeII_error = 0.15, typeI_error=0.01, repeat= 3, seq = True)
print(f'accuracy: {np.mean(subject_array[:,1] == test_result[:,1])}')
print(f'test consumption {consum}')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  subject_array = fbtk.data_gen(100000, 0.01)


AttributeError: module 'fast_btk' has no attribute 'one_batch_test_int_solver'

# Reproduce Results

The following code is to produce results on Table 7 and Table 8. We will go through table 7_(a) and show the output. 

In [None]:
# table 7 (a)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    length = len(temp_data)
    acc = np.zeros(length)
    sens = np.zeros(length)
    spec = np.zeros(length)
    ppv = np.zeros(length)
    npv = np.zeros(length)
    test_consum = np.zeros(length)
    for i in range(length):
        pred, consum = fbtk.conventional_test(temp_data[i], typeII_error= 0.15, typeI_error=0.01)
        acc[i] = np.mean(pred[:,1] == temp_data[i][:, 1])
        sens[i] = recall_score(temp_data[i][:, 1], pred[:, 1])
        spec[i] = fbtk.specificity_score(temp_data[i][:, 1], pred[:, 1])
        ppv[i] = precision_score(temp_data[i][:, 1], pred[:, 1])
        npv[i] = fbtk.npv_score(temp_data[i][:, 1], pred[:, 1])
        test_consum[i] = consum
    result = {
        'acc': acc,
        'sens': sens,
        'spec': spec,
        'PPV': ppv,
        'NPV': npv,
        'test_consum': test_consum
    
    }
    result = pd.DataFrame(result)
    result_mean = result.mean()
    result_std = result.std()
    temp_df = [prob, result_mean['acc'], result_std['acc'], result_mean['sens'], result_std['sens'],
    result_mean['spec'], result_std['spec'], result_mean['PPV'], result_std['PPV'], result_mean['NPV'],
    result_std['NPV'], result_mean['test_consum'], result_std['test_consum']]
    temp_df = pd.DataFrame(temp_df)
    temp_df = temp_df.T
    temp_df.columns = df.columns
    df = pd.concat([df, temp_df])


  
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
# Show the result
df

For table 7 (b)

In [None]:
# table 7 (b)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [1]:
            for k in [1]:
                
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': 10,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('table7_b.csv')

For table 7 (c)

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(10000, prob) for _ in range(10)]
    for i in [True]:
        for j in [1]:
            for k in [3]:
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit= 32)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': 32}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
# table 7 d
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sq_Repeat', 'Ind_Repeat', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [3]: # sq_repeat
        for j in [3]: # ind_repeat
            kwargs = {
                'side_length': 12,
                'typeII_error': 0.15,
                'typeI_error': 0.01,
                'sq_repeat': i,
                'ind_repeat': j
            }
            test_1 = fbtk.test_result(temp_data, fbtk.matrix_test, **kwargs)
            temp_mean = test_1.mean()
            temp_std = test_1.std()
            temp = [prob, kwargs['sq_repeat'], kwargs['ind_repeat'], temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
            temp_df = pd.DataFrame(temp)
            temp_df = temp_df.T
            temp_df.columns = ['Infection_rate', 'Sq_Repeat', 'Ind_Repeat', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
            df = pd.concat([df, temp_df])

            
                
               
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df

table 7 (E)

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [1]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('table7_e.csv')

table 7 (f)

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [3]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit= 32)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': 32}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df

In [None]:
df.to_csv('table7_f_limit_32.csv')

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [3]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit= 64)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': 64}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('table7_f_limit_64.csv')

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [2]: # stop_rule
            for k in [2]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit= 32)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': 32}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('stop_rule_2_limit_32.csv')

In [None]:
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [2]: # stop_rule
            for k in [2]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit= 64)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': 64} # change batch_limt
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')
df.to_csv('stop_rule_2_limit_64.csv')

In [None]:
# appendix A
# table 7 (a)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    length = len(temp_data)
    acc = np.zeros(length)
    sens = np.zeros(length)
    spec = np.zeros(length)
    ppv = np.zeros(length)
    npv = np.zeros(length)
    test_consum = np.zeros(length)
    for i in range(length):
        pred, consum = fbtk.conventional_test(temp_data[i], typeII_error= 0.25, typeI_error=0.03)
        acc[i] = np.mean(pred[:,1] == temp_data[i][:, 1])
        sens[i] = recall_score(temp_data[i][:, 1], pred[:, 1])
        spec[i] = fbtk.specificity_score(temp_data[i][:, 1], pred[:, 1])
        ppv[i] = precision_score(temp_data[i][:, 1], pred[:, 1])
        npv[i] = fbtk.npv_score(temp_data[i][:, 1], pred[:, 1])
        test_consum[i] = consum
    result = {
        'acc': acc,
        'sens': sens,
        'spec': spec,
        'PPV': ppv,
        'NPV': npv,
        'test_consum': test_consum
    
    }
    result = pd.DataFrame(result)
    result_mean = result.mean()
    result_std = result.std()
    temp_df = [prob, result_mean['acc'], result_std['acc'], result_mean['sens'], result_std['sens'],
    result_mean['spec'], result_std['spec'], result_mean['PPV'], result_std['PPV'], result_mean['NPV'],
    result_std['NPV'], result_mean['test_consum'], result_std['test_consum']]
    temp_df = pd.DataFrame(temp_df)
    temp_df = temp_df.T
    temp_df.columns = df.columns
    df = pd.concat([df, temp_df])


  
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('appendix_a.csv')

In [None]:
# Appendix (b)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [1]:
            for k in [1]:
                
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': 10,
                'typeII_error': 0.25, 'typeI_error': 0.03, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('appendix_b.csv')

In [None]:
# Appendix (c)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [1]:
            for k in [3]:
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.25, 0.03)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.25, 'typeI_error': 0.03, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('appendix_c.csv')

In [None]:
# Appendix (d)
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sq_Repeat', 'Ind_Repeat', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [3]: # sq_repeat
        for j in [3]: # ind_repeat
            kwargs = {
                'side_length': 12,
                'typeII_error': 0.25,
                'typeI_error': 0.03,
                'sq_repeat': i,
                'ind_repeat': j
            }
            test_1 = fbtk.test_result(temp_data, fbtk.matrix_test, **kwargs)
            temp_mean = test_1.mean()
            temp_std = test_1.std()
            temp = [prob, kwargs['sq_repeat'], kwargs['ind_repeat'], temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
            temp_df = pd.DataFrame(temp)
            temp_df = temp_df.T
            temp_df.columns = ['Infection_rate', 'Sq_Repeat', 'Ind_Repeat', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
            df = pd.concat([df, temp_df])

            
                
               
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('appendix_d.csv')

In [None]:
# Appendix e
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [1]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.25, 0.03)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.25, 'typeI_error': 0.03, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
# Appendix f
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [3]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.25, 0.03)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.25, 'typeI_error': 0.03, 'repeat': k,
                'prob_threshold': 0.3, 'seq': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['seq'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Sequential_test', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
string = ''
str.count(string, '+')

In [None]:
def helper_fun(n, string = ''):
    if str.count(string, '+') >= n or str.count(string, '-') >= n:
        return string
    else:
        next_string = string + '+'
        return helper_fun(n, next_string )

In [None]:
def name_fun(n):
    """
    input: stopping rule
    output: finish nodes
    """
    output = []
    temp = ['']
    for i in range(2*n-1):
        temp_cur = []
        for j in temp:
            candidate_pos = j + '+'
            candidate_neg = j + '-'
            if str.count(candidate_pos, '+') >= n:
                output.append(candidate_pos)
            else:
                temp_cur.append(candidate_pos)

            if str.count(candidate_neg, '-') >= n:
                output.append(candidate_neg)
            else:
                temp_cur.append(candidate_neg)

        temp = temp_cur

        neg_symbol = [x for x in output if str.count(x, '-') == n]
        pos_symbol = [x for x in output if str.count(x, '+') == n]

    return output, neg_symbol, pos_symbol





In [None]:
a, b, c = name_fun(3)

In [None]:
(1-0.15) ** 2

In [None]:
from scipy.special import binom

In [None]:
sum([(1-0.15) ** 3 * (1-0.15 ** 3) * binom(2+i, i) * 0.15 ** i for i in range(0, 3)])

In [None]:
(1-0.15) ** 2 * (1 - 0.15 ** 3)

In [None]:
def sensitivity(n, beta):
    _, _, pos_node = name_fun(n)
    res = [(1-beta) * (1-beta) ** i.count('+') * beta ** (i.count('-')) for i in pos_node]
    return sum(res)



In [None]:
n = 3
output = []
temp = ['']
for i in range(2*n-1):
    temp_cur = []
    for j in temp:
        candidate_pos = j + '+'
        candidate_neg = j + '-'
        if str.count(candidate_pos, '+') >= n:
            output.append(candidate_pos)
        else:
            temp_cur.append(candidate_pos)

        if str.count(candidate_neg, '-') >= n:
            output.append(candidate_neg)
        else:
            temp_cur.append(candidate_neg)

    temp = temp_cur

In [None]:
temp = fbtk.data_gen(100000, 0.1)

In [None]:
a, b, c, d, e = fbtk.seq_test_with_node(temp,stop_rule = 3,p = 0.1, batch_size = 32, typeII_error = 0.15, typeI_error = 0.01, repeat = 1, 
prob_threshold = 0.3, seq = True, batch_limit = 32)

In [None]:
10 > np.inf

In [None]:
a

In [None]:
def node_summary(data, seq_test, stopping_rule, **kwargs):
    a, _, _ = fbtk.name_fun(stopping_rule)
    a.extend(['other'])
    df = pd.DataFrame([], columns = a)
    b = ['stage_' + str(i) for i in range(1, 2*stopping_rule)]
    df_b = pd.DataFrame([], columns = b)
    if not isinstance(data, list):
        _, _, _, node, batch_list = seq_test(data, **kwargs)
        temp = np.zeros(len(a))
        node_count = collections.Counter(node)
        for i in node_count:
            if i in a:
                idx = a.index(i)
                temp[idx] = node_count[i]
            else:
                temp[-1] += node_count[i]
        df.loc[len(df)] = temp
        df_b.loc[len(df)] = batch_list

        return df, df_b

    else:
        for j in range(len(data)):
            _, _, _, node, batch_list = seq_test(data[j], **kwargs)
            temp = np.zeros(len(a))
            node_count = collections.Counter(node)
            for i in node_count:
                if i in a:
                    idx = a.index(i)
                    temp[idx] = node_count[i]
                else:
                    temp[-1] += node_count[i]
            df.loc[len(df)] = temp
            df_b.loc[len(df)] = batch_list


        return df, df_b




In [None]:
s = 0.1
k = 10000000
time_start = time.time()
np.random.seed(0)
data = fbtk.data_gen(100000, s)
a,b, c, d, e, = fbtk.seq_test_with_node(data, 3, 0.1,32, 0.15, 0.01, 3, 0.3, batch_limit=k)
a1, b1, c1 = fbtk.seq_test(data, 3, 0.1, 32, 0.15, 0.01, 3, 0.3, seq = True, batch_limit = k)

In [None]:
e

In [None]:
np.mean(a[:,1] == data[:,1])
#print(b, b1, c, c1)

In [None]:
time_start = time.time()
np.random.seed(0)
node_name,_,_ = fbtk.name_fun(3)
result = pd.DataFrame([], columns=node_name.extend(['p', 'stop_rule', 'upper limit']))
col_b = ['stage_' + str(i) for i in range(1, 2*3)]
col_b.extend(['p', 'stop_rule', 'upper limit'])

result_b = pd.DataFrame([], columns = col_b)
for s in [0.001, 0.01, 0.03, 0.05, 0.1]:
    temp_data = [fbtk.data_gen(100000, s) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [32, 64, 1000000]: # batch_upperlimit
                batch_size = fbtk.one_batch_test_int_solver(s, 0.15, 0.01, batch_limit= k)
                kwargs = {'stop_rule': j, 'p': s, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': 3,
                'prob_threshold': 0.3, 'seq': i, 'batch_limit': k}
                test, test_b = node_summary(temp_data, fbtk.seq_test_with_node,j,**kwargs)
                test['p'] = s
                test['stop_rule'] = 3
                test['upper limit'] = k
                test_b['p'] = s
                test_b['stop_rule'] = 3
                test_b['p'] = s
                test_b['upper limit'] = k
                result = pd.concat([result, test])
                result_b = pd.concat([result_b, test_b])

time_end = time.time()
print(time_end - time_start)

In [None]:
time_start = time.time()
np.random.seed(0)
node_name,_,_ = fbtk.name_fun(3)
result = pd.DataFrame([], columns=node_name.extend(['p', 'stop_rule', 'upper limit']))
col_b = ['stage_' + str(i) for i in range(1, 2*3)]
col_b.extend(['p', 'stop_rule', 'upper limit'])

result_b = pd.DataFrame([], columns = col_b)
for s in [0.001, 0.01, 0.03]:
    temp_data = [fbtk.data_gen(100000, s) for _ in range(1000)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [1000000]: # batch_upperlimit
                batch_size = fbtk.one_batch_test_int_solver(s, 0.15, 0.01, batch_limit= k)
                kwargs = {'stop_rule': j, 'p': s, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': 3,
                'prob_threshold': 1, 'seq': i, 'batch_limit': k}
                test, test_b = node_summary(temp_data, fbtk.seq_test_with_node,j,**kwargs)
                test['p'] = s
                test['stop_rule'] = 3
                test['upper limit'] = k
                test_b['p'] = s
                test_b['stop_rule'] = 3
                test_b['p'] = s
                test_b['upper limit'] = k
                result = pd.concat([result, test])
                result_b = pd.concat([result_b, test_b])

time_end = time.time()
print(time_end - time_start)

In [None]:
result.to_csv('more_sim.csv')

In [None]:
result_b

In [None]:
result.to_csv('node_test.csv')
result_b.to_csv('batch_cum.csv')

In [None]:
time_start = time.time()
np.random.seed(0)
node_name,_,_ = fbtk.name_fun(3)
result = pd.DataFrame([], columns=node_name.extend(['p', 'stop_rule', 'upper limit']))
for s in [0.001, 0.01, 0.03, 0.05, 0.1]:
    temp_data = [fbtk.data_gen(100000, s) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [32, 64, 1000000]: # batch_upperlimit
                batch_size = fbtk.one_batch_test_int_solver(s, 0.15, 0.01, batch_limit= k)
                kwargs = {'stop_rule': j, 'p': s, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': 3,
                'prob_threshold': 1, 'seq': i, 'batch_limit': k}
                test = node_summary(temp_data, fbtk.seq_test_with_node,j,**kwargs)
                test['p'] = s
                test['stop_rule'] = 3
                test['upper limit'] = k
                result = pd.concat([result, test])

time_end = time.time()
print(time_end - time_start)

In [None]:
result.to_csv('node_test_without_threshold.csv')

In [None]:
temp = np.zeros(21)

for item in test:
    if item in a:
        idx = a.index(item)
        temp[idx] = test[item]
    else:
        temp[-1] += test[item]
df.loc[len(df)] = temp

In [None]:
df

In [None]:
df

In [None]:
pd.Series(collections.Counter(node))

In [None]:
import collections

In [None]:
collections.Counter(node)

In [None]:
time_start = time.time()
np.random.seed(0)
node_name,_,_ = fbtk.name_fun(3)
result = pd.DataFrame([], columns=node_name.extend(['p', 'stop_rule', 'upper limit']))
for s in [0.001, 0.01, 0.03, 0.05, 0.1]:
    temp_data = [fbtk.data_gen(100000, s) for _ in range(100)]
    for i in [True]:
        for j in [3]: # stop_rule
            for k in [32, 64, 1000000]: # batch_upperlimit
                batch_size = fbtk.one_batch_test_int_solver(s, 0.15, 0.01, batch_limit= k)
                kwargs = {'stop_rule': j, 'p': s, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': 3,
                'prob_threshold': 1, 'seq': i, 'batch_limit': k}
                test = node_summary(temp_data, fbtk.seq_test_with_node,j,**kwargs)
                test['p'] = s
                test['stop_rule'] = 3
                test['upper limit'] = k
                result = pd.concat([result, test])

time_end = time.time()
print(time_end - time_start)

In [None]:
# Appendix f
time_start = time.time()
np.random.seed(0)
df = pd.DataFrame([], columns = ['Infection_rate', 'Batch_upper_limit', 'Stop_rule', 'Repeat', 'Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD'])
for prob in [0.001, 0.01, 0.03, 0.05, 0.10]:
    temp_data = [fbtk.data_gen(100000, prob) for _ in range(100)]
    for i in [32, 64, 100000]:
        for j in [3]: # stop_rule
            for k in [3]: # repeat
                batch_size = fbtk.one_batch_test_int_solver(prob, 0.15, 0.01, batch_limit = i)
                kwargs = {'stop_rule': j, 'p': prob, 'batch_size': batch_size,
                'typeII_error': 0.15, 'typeI_error': 0.01, 'repeat': k,
                'prob_threshold': 0.3, 'seq': True, 'batch_limit': i}
                test_1 = fbtk.test_result(temp_data, fbtk.seq_test, **kwargs)
                temp_mean = test_1.mean()
                temp_std = test_1.std()
                temp = [kwargs['p'], kwargs['batch_limit'], kwargs['stop_rule'], kwargs['repeat'], kwargs['prob_threshold'],temp_mean['acc'], temp_std['acc'], temp_mean['sens'], temp_std['sens'], temp_mean['spec'], temp_std['spec'], temp_mean['PPV'], temp_std['PPV'], temp_mean['NPV'], temp_std['NPV'], temp_mean['test_consum'], temp_std['test_consum'], temp_mean['ind_consum'], temp_std['ind_consum'], temp_mean['batch_consum'], temp_std['batch_consum']]
                temp_df = pd.DataFrame(temp)
                temp_df = temp_df.T
                temp_df.columns = ['Infection_rate', 'Batch_upper_limit', 'Stop_rule', 'Repeat','Prob_threshold', 'Acc', 'Acc_SD', 'Sens', 'Sens_SD', 'Spec','Spec_SD','PPV', 'PPV_SD',
    'NPV', 'NPV_SD', 'Test_consum', 'Test_consum_SD', 'Ind_consum', 'Ind_consum_SD', 'Batch_consum','Batch_consum_SD']
                df = pd.concat([df, temp_df])
            
time_end = time.time()
print('running time:', time_end - time_start, 's')

In [None]:
df.to_csv('result_batch_limit.csv')

In [None]:
fbtk.one_batch_test_int_solver(0.05, 0.15, 0.01, batch_limit = 100000)

In [None]:
?fbtk.one_batch_test_int_solver

In [None]:
B = np.array([[2,-2,0],[1, 0, -1],[0, 1, -1]])

In [None]:
A = [[2, 4, 6, 8],[1, 2, 3, 4], [3, 5, 7, 9]]

In [None]:
np.matmul(np.matmul(B, A) , np.array([[1],[-1], [1], [0]]))