In [4]:
import numpy as np
from cort_si import SI_CoRT, CoRT_builder

import warnings
warnings.filterwarnings("ignore")

## Testing p_value

In [5]:
n_target = 50
n_source = 20
p = 100
K = 5 
Ka = 3
h = 30 
N = n_target + Ka * n_source
NI = n_target + n_source
lamda_k_source = 1.2 * np.sqrt(np.log(p)/ N)
lamda_1_source = 1.2 * np.sqrt(np.log(p)/ NI) 
lamda_not_source = 1.2 * np.sqrt(np.log(p) / n_target) 
alpha = 0.05
T = 3
s_len = 5
s_vector = [1] * s_len
para_results_storage = []
CoRT_model = CoRT_builder.CoRT(alpha=lamda_not_source)
iteration = 1000

cnt1 = 0
cnt2 = 0
cnt3 = 0
cnt4 = 0

for i in range(iteration):
    target_data, source_data = CoRT_model.gen_data(n_target, n_source, p, K, Ka, h, s_vector, s_len, "AR")
    result_dict = SI_CoRT.SI_parametric(n_target, p, K, target_data, source_data, lamda_not_source, lamda_1_source, lamda_k_source, T, s_len)
    if result_dict != None:
        cnt1 += (result_dict['is_signal'] == True)
        cnt2 += (result_dict['is_signal'] == False)
        cnt3 += (result_dict['is_signal'] == True and result_dict['p_value'] <= alpha)
        cnt4 += (result_dict['is_signal'] == False and result_dict['p_value'] <= alpha)
        if i % 100 == 0:
            print(f"is_signal : {result_dict['is_signal']}, p_values[{i}]: {result_dict['p_value']}")
            print(f"FPR: {cnt4 / cnt2}, TPR: {cnt3 / cnt1}")
            print(f"is_not_signal: {int(cnt2), int(cnt4)}")
            print(f"is_signal: {int(cnt1), int(cnt3)}")
            print("===========================================================================")

        para_results_storage.append(result_dict)

is_signal : True, p_values[0]: 0.003371563431806557
FPR: nan, TPR: 1.0
is_not_signal: (0, 0)
is_signal: (1, 1)
is_signal : False, p_values[100]: 0.02645215361335751
FPR: 0.05555555555555555, TPR: 0.8615384615384616
is_not_signal: (36, 2)
is_signal: (65, 56)
is_signal : True, p_values[200]: 0.1454279093099442
FPR: 0.03333333333333333, TPR: 0.8368794326241135
is_not_signal: (60, 2)
is_signal: (141, 118)
is_signal : True, p_values[300]: 1.0991804751503409e-05
FPR: 0.03409090909090909, TPR: 0.8215962441314554
is_not_signal: (88, 3)
is_signal: (213, 175)
is_signal : False, p_values[400]: 0.20598322203081687
FPR: 0.024793388429752067, TPR: 0.8214285714285714
is_not_signal: (121, 3)
is_signal: (280, 230)
is_signal : True, p_values[500]: 3.896462372754428e-07
FPR: 0.032679738562091505, TPR: 0.8132183908045977
is_not_signal: (153, 5)
is_signal: (348, 283)
is_signal : True, p_values[600]: 0.068120214623971
FPR: 0.027472527472527472, TPR: 0.7923627684964201
is_not_signal: (182, 5)
is_signal: (419

## Testing True Positive Rate and False Positive Rate

In [6]:
is_signal_cases = [r for r in para_results_storage if r['is_signal']]
not_signal_cases = [r for r in para_results_storage if not r['is_signal']]

false_positives = sum(1 for c in not_signal_cases if c['p_value'] <= alpha)
print(f"len not_signal_cases : {len(not_signal_cases)}")
print(f"false_positives: {false_positives}")
fpr = false_positives / len(not_signal_cases)
print(f"FPR: {fpr:.4f} (Target: {alpha})")
print("\n")
true_positives = sum(1 for r in is_signal_cases if r['p_value'] <= alpha)
print(f"len not_signal_cases : {len(is_signal_cases)}")
print(f"true_positives: {true_positives}")
tpr = true_positives / len(is_signal_cases)
print(f"TPR: {tpr:.4f}")

len not_signal_cases : 292
false_positives: 9
FPR: 0.0308 (Target: 0.05)


len not_signal_cases : 708
true_positives: 544
TPR: 0.7684
