In [1]:
import numpy as np
from New_functions import *
from Benchmark_functions import *
import warnings
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
warnings.filterwarnings("ignore")

In [2]:
'''
First, we generate a dataset, we can change the Alpha_s, Alpha_t and effect parameter to change the distribution of
the generated dataset.
'''

import numpy as np

def generate_binary(ns, nt, p, q, s, t, u, Alpha_s=1, Alpha_t=0, effect=1, x_effect=0, z_diff=0.1, threshold_X=0.5, threshold_Y=0.5):
    # Generate normal distributions for Z
    Zs_null = np.random.normal(0, 1, (ns, q))
    Zt_null = np.random.normal(0, 1, (nt, q))
    
    # Generate Z variables with and without shift
    Z_source = np.hstack((np.random.normal(0, 1, (ns, p)), Zs_null))
    Z_target = np.hstack((np.random.normal(z_diff, 1, (nt, p)), Zt_null))
    
    # Generate X variables
    X_source = Z_source[:, :p] @ u + np.random.normal(0, 1, ns)
    X_target = Z_target[:, :p] @ u + np.random.normal(0, 1, nt)
    
    # Convert X to binary
    X_source = (np.random.rand(ns) < 1 / (1 + np.exp(-X_source))).astype(int)
    X_target = (np.random.rand(nt) < 1 / (1 + np.exp(-X_target))).astype(int)
    
    # Generate V variables
    V_source = Z_source[:, :p] @ s + Alpha_s * X_source + np.random.normal(0, 5, ns)
    V_target = Z_target[:, :p] @ t + Alpha_t * X_target + np.random.normal(0, 5, nt)
    
    # Generate Y variables
    Y_source = (Z_source[:, :p].sum(axis=1))**2 + effect * V_source + np.random.normal(0, 1, ns) + x_effect * X_source
    Y_target = (Z_target[:, :p].sum(axis=1))**2 + effect * V_target + np.random.normal(0, 1, nt) + x_effect * X_target
    
    # Convert Y to binary
    Y_source = (np.random.rand(ns) < 1 / (1 + np.exp(-Y_source))).astype(int)
    Y_target = (np.random.rand(nt) < 1 / (1 + np.exp(-Y_target))).astype(int)
    
    return Y_source.reshape(-1, 1), X_source.reshape(-1, 1), V_source.reshape(-1, 1), Z_source,\
           Y_target.reshape(-1, 1), X_target.reshape(-1, 1), V_target.reshape(-1, 1), Z_target



def generate(ns, nt, p,q, s, t, u, Alpha_s=0, Alpha_t = 2,effect=1,x_effect = 0, non_lin = 0, z_diff = 0.1):
    Zs_null = np.random.normal(0,0.1, (ns, q))
    Zt_null = np.random.normal(0,0.1, (nt, q))
    
    Z_source = np.hstack((np.random.normal(0, 1, (ns, p)) , Zs_null))
    Z_target = np.hstack((np.random.normal(z_diff, 1, (nt, p)) , Zt_null))
    
    X_source = Z_source[:, :p] @ u + np.random.normal(0, 1, ns)
    X_target = Z_target[:, :p] @ u + np.random.normal(0, 1, nt)

    V_source = Z_source[:, :p] @ s + (1-non_lin)*Alpha_s * X_source + non_lin * Alpha_s * np.sin(X_source) + np.random.normal(0, 5, ns)
    V_target = Z_target[:, :p] @ t + (1-non_lin)*Alpha_t * X_target + non_lin * Alpha_t * np.sin(X_target) + np.random.normal(0, 5, nt)
    
    # V_source = Z_source[:, :p] @ s + 2*X_source 
    # V_target = Z_target[:, :p] @ t - 2*X_target
    
    Y_source = (Z_source[:, :p].sum(axis=1))**2 + effect*V_source + x_effect*X_source + np.random.normal(0, 1, ns) 
    Y_target = (Z_target[:, :p].sum(axis=1))**2 + effect*V_target + x_effect*X_target +np.random.normal(0, 1, nt) 
    
    
    return Y_source.reshape(-1, 1), X_source.reshape(-1, 1), V_source.reshape(-1, 1), Z_source,\
           Y_target.reshape(-1, 1), X_target.reshape(-1, 1), V_target.reshape(-1, 1), Z_target

In [3]:
#Set parameter for the generation of data
ns, p,q = 1000, 5, 50
nt = 2000

s = np.array([-1, -0.5, 0, 1, 1.5])
t = np.array([ 1, -1, 0.5 , -0.5, -1])
u = np.array([ 0, -1, 0.5, -0.5, 1])

Y_source, X_source, V_source, Z_source,Y_target, X_target, V_target, Z_target = \
generate_binary(ns,nt, p,q, s, t, u, Alpha_s=0, Alpha_t = 1,effect=2, z_diff = 0.1)
            

In [4]:
# Data Separation:
# # The data arrays X_e, Z_e, V_e are designated for density ratio estimation.
# # The arrays Z_source, X_source, V_source, Y_source are used for testing.
# # Here, we split the source data based on a specified proportion.

# proportion = 0.5
# num = int(proportion * X_source.shape[0])
# Z_e = Z_source[:num]
# X_e = X_source[:num]
# V_e = V_source[:num]
# Z_source = Z_source[num+1:]
# X_source = X_source[num+1:]
# V_source = V_source[num+1:]
# Y_source = Y_source[num+1:]
# Y_shuffle = Y_source


# Real data experiments to run

In [5]:
# Data Separation:
# The data arrays X_e, Z_e, V_e are designated for density ratio estimation.
# The arrays Z_source, X_source, V_source, Y_source are used for testing.
# Here, we split the source data based on a specified proportion.

proportion = 0.5
num = int(proportion * X_source.shape[0])
Z_e = Z_source[:num]
X_e = X_source[:num]
V_e = V_source[:num]
Z_source = Z_source[num+1:]
X_source = X_source[num+1:]
V_source = V_source[num+1:]
Y_source = Y_source[num+1:]
Y_shuffle = Y_source

## Check the Type-I error control with random shuffle Y

In [6]:
count = 0
for _ in tqdm(range(100), desc='Shuffling Y'):
    np.random.shuffle(Y_shuffle)
    p_value = Test(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_shuffle, \
     X_target, Z_target, V_target, L=3, K=20, datatype='binary')
    print(p_value)
    if p_value < 0.05:
        count += 1
print(f'Simulated Type-I error rate for csPCR is {count/100}')
    
    

Shuffling Y:   1%|          | 1/100 [00:03<05:13,  3.17s/it]

0.1316244092041311


Shuffling Y:   2%|▏         | 2/100 [00:06<05:12,  3.19s/it]

0.19622971429265224


Shuffling Y:   3%|▎         | 3/100 [00:09<05:08,  3.18s/it]

0.23034358340321526


Shuffling Y:   4%|▍         | 4/100 [00:12<05:04,  3.17s/it]

0.21141531953416282


Shuffling Y:   5%|▌         | 5/100 [00:15<05:00,  3.17s/it]

0.06790872673989001


Shuffling Y:   6%|▌         | 6/100 [00:19<04:57,  3.17s/it]

0.17456094838338754


Shuffling Y:   7%|▋         | 7/100 [00:22<04:56,  3.19s/it]

0.11256711624928473


Shuffling Y:   8%|▊         | 8/100 [00:25<04:52,  3.18s/it]

0.10940476387662001


Shuffling Y:   9%|▉         | 9/100 [00:28<04:49,  3.18s/it]

0.21923207206874418


Shuffling Y:  10%|█         | 10/100 [00:31<04:46,  3.19s/it]

0.14238376651665707


Shuffling Y:  11%|█         | 11/100 [00:35<04:45,  3.20s/it]

0.19101901839891688


Shuffling Y:  12%|█▏        | 12/100 [00:38<04:41,  3.20s/it]

0.19161170544546835


Shuffling Y:  12%|█▏        | 12/100 [00:40<04:57,  3.38s/it]


KeyboardInterrupt: 

In [8]:
count = 0
for _ in tqdm(range(100), desc='Shuffling Y'):
    np.random.shuffle(Y_shuffle)
    p_value = Test_pe(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_shuffle, \
     X_target, Z_target, V_target, L=3, K=20, datatype='binary')
    print(p_value)
    if p_value < 0.05:
        count += 1
print(f'Simulated Type-I error rate for csPCR(pe) is {count/100}')
    

Shuffling Y:   0%|          | 0/100 [00:00<?, ?it/s]

[0.1470994  0.22118357 0.16352529]


Shuffling Y:   1%|          | 1/100 [00:09<15:19,  9.29s/it]

0.6166357013102293
[0.04906191 0.12899593 0.05459335]


Shuffling Y:   2%|▏         | 2/100 [00:18<15:11,  9.30s/it]

0.252169568120723
[0.12948721 0.14835543 0.08889146]


Shuffling Y:   3%|▎         | 3/100 [00:27<15:01,  9.29s/it]

0.7278822224133394
[0.08700511 0.20639202 0.08064044]


Shuffling Y:   4%|▍         | 4/100 [00:37<14:56,  9.34s/it]

0.6022853570374143
[0.00502327 0.15479621 0.10967168]


Shuffling Y:   5%|▌         | 5/100 [00:46<14:50,  9.38s/it]

0.15434054639702932
[0.2208907  0.18506196 0.08948592]


Shuffling Y:   6%|▌         | 6/100 [00:56<14:40,  9.37s/it]

0.20094823404188278
[0.17382799 0.18290783 0.0901834 ]


Shuffling Y:   7%|▋         | 7/100 [01:05<14:31,  9.37s/it]

0.4173208762356868
[0.10537095 0.19669362 0.05271453]


Shuffling Y:   8%|▊         | 8/100 [01:14<14:21,  9.36s/it]

0.9473402793749636
[0.06356616 0.19450524 0.24356806]


Shuffling Y:   9%|▉         | 9/100 [01:24<14:11,  9.36s/it]

0.8013036074904593
[0.17593596 0.22124156 0.17833011]


Shuffling Y:  10%|█         | 10/100 [01:33<14:02,  9.37s/it]

1.0
[0.22366614 0.16895261 0.16395273]


Shuffling Y:  11%|█         | 11/100 [01:42<13:52,  9.36s/it]

0.8063693684002105
[0.28534744 0.18316859 0.18196553]


Shuffling Y:  12%|█▏        | 12/100 [01:52<13:44,  9.37s/it]

0.4351914482698487
[0.18387156 0.31868854 0.29874866]


Shuffling Y:  13%|█▎        | 13/100 [02:01<13:33,  9.35s/it]

0.06556625178678643
[0.18902843 0.12228656 0.15873888]


Shuffling Y:  14%|█▍        | 14/100 [02:10<13:24,  9.36s/it]

0.32195130904816227
[0.15849357 0.13757403 0.13795839]


Shuffling Y:  15%|█▌        | 15/100 [02:20<13:14,  9.35s/it]

0.15754228100829692
[0.15957868 0.18373809 0.12778599]


Shuffling Y:  16%|█▌        | 16/100 [02:29<13:06,  9.36s/it]

0.354340399396492
[0.16869009 0.17187504 0.17842525]


Shuffling Y:  17%|█▋        | 17/100 [02:38<12:55,  9.35s/it]

0.5612809668403604
[0.11509307 0.26923504 0.15104661]


Shuffling Y:  18%|█▊        | 18/100 [02:48<12:47,  9.36s/it]

0.18221797654564886
[0.13295061 0.19317418 0.16846665]


Shuffling Y:  19%|█▉        | 19/100 [02:57<12:38,  9.37s/it]

0.6783686704807059
[0.15442211 0.2391136  0.09984427]


Shuffling Y:  20%|██        | 20/100 [03:07<12:28,  9.35s/it]

0.3794406281118784
[0.2277457  0.25784416 0.25728668]


Shuffling Y:  21%|██        | 21/100 [03:16<12:19,  9.36s/it]

0.8574494912768665
[0.22458586 0.10619777 0.05828369]


Shuffling Y:  22%|██▏       | 22/100 [03:25<12:09,  9.35s/it]

0.973194607582782
[0.11452328 0.26529192 0.17945201]


Shuffling Y:  23%|██▎       | 23/100 [03:35<11:59,  9.34s/it]

0.511156695172407
[0.06185925 0.08947887 0.28495505]


Shuffling Y:  24%|██▍       | 24/100 [03:44<11:50,  9.35s/it]

1.0
[0.05211497 0.2753209  0.08660241]


Shuffling Y:  25%|██▌       | 25/100 [03:53<11:40,  9.34s/it]

0.5550045311594005
[0.11278807 0.18505434 0.19341118]


Shuffling Y:  26%|██▌       | 26/100 [04:03<11:32,  9.36s/it]

0.6874411957304738
[0.16206995 0.24128898 0.08323232]


Shuffling Y:  27%|██▋       | 27/100 [04:12<11:22,  9.35s/it]

0.1957678290440189
[0.05296746 0.05255974 0.11446782]


Shuffling Y:  28%|██▊       | 28/100 [04:21<11:13,  9.35s/it]

1.0
[ 0.19103952  0.17689916 -0.00568575]


Shuffling Y:  29%|██▉       | 29/100 [04:31<11:05,  9.37s/it]

1.0
[0.19855924 0.30231543 0.26166369]


Shuffling Y:  30%|███       | 30/100 [04:40<10:54,  9.36s/it]

0.1880901955908093
[0.027663   0.18277958 0.10781286]


Shuffling Y:  31%|███       | 31/100 [04:49<10:45,  9.35s/it]

0.32547840549685314
[0.09829608 0.0971261  0.08863729]


Shuffling Y:  32%|███▏      | 32/100 [04:59<10:34,  9.34s/it]

0.3239513161176927
[0.02666172 0.13386578 0.0757348 ]


Shuffling Y:  33%|███▎      | 33/100 [05:08<10:26,  9.36s/it]

0.16426972231497938
[0.09990766 0.21137531 0.20567281]


Shuffling Y:  34%|███▍      | 34/100 [05:18<10:18,  9.37s/it]

0.5007668553183311
[0.25434773 0.26949544 0.1303596 ]


Shuffling Y:  35%|███▌      | 35/100 [05:27<10:08,  9.37s/it]

0.7412787868677391
[0.11994284 0.27201547 0.14323936]


Shuffling Y:  36%|███▌      | 36/100 [05:36<10:00,  9.38s/it]

0.9239305186406774
[0.18920278 0.08264786 0.01936203]


Shuffling Y:  37%|███▋      | 37/100 [05:46<09:50,  9.37s/it]

0.40867525419040474
[-0.03010261  0.06048428  0.13454153]


Shuffling Y:  38%|███▊      | 38/100 [05:55<09:41,  9.38s/it]

1.0
[0.01307295 0.15534795 0.16850473]


Shuffling Y:  39%|███▉      | 39/100 [06:04<09:32,  9.38s/it]

0.9532905354895921
[0.14764142 0.2013955  0.06201066]


Shuffling Y:  40%|████      | 40/100 [06:14<09:23,  9.38s/it]

0.8834601289102414
[0.08500292 0.18913136 0.17473487]


Shuffling Y:  41%|████      | 41/100 [06:23<09:14,  9.39s/it]

1.0
[0.06426069 0.19095604 0.13368604]


Shuffling Y:  42%|████▏     | 42/100 [06:33<09:04,  9.38s/it]

1.0
[0.12715965 0.27593476 0.15362281]


Shuffling Y:  43%|████▎     | 43/100 [06:42<08:53,  9.36s/it]

0.5249671714720114
[0.10732199 0.209162   0.13116328]


Shuffling Y:  44%|████▍     | 44/100 [06:51<08:43,  9.36s/it]

0.6621761521891085
[0.11186983 0.19948362 0.1362505 ]


Shuffling Y:  45%|████▌     | 45/100 [07:01<08:35,  9.36s/it]

0.9573505673625111
[0.06055277 0.20184788 0.03377142]


Shuffling Y:  46%|████▌     | 46/100 [07:10<08:25,  9.36s/it]

0.5136510872058693
[0.17966537 0.17006499 0.1064804 ]


Shuffling Y:  47%|████▋     | 47/100 [07:19<08:14,  9.34s/it]

0.2152085101177328
[0.15362367 0.2076238  0.13155463]


Shuffling Y:  48%|████▊     | 48/100 [07:29<08:05,  9.34s/it]

0.8258904294200724
[0.17561313 0.26754947 0.16709064]


Shuffling Y:  49%|████▉     | 49/100 [07:38<07:56,  9.35s/it]

0.6306377638041192
[0.18273942 0.22006062 0.04197615]


Shuffling Y:  50%|█████     | 50/100 [07:47<07:47,  9.36s/it]

0.27085906740279064
[0.16492278 0.28195007 0.16697609]


Shuffling Y:  51%|█████     | 51/100 [07:57<07:39,  9.37s/it]

0.30363000989763744
[0.19622066 0.14867673 0.08111286]


Shuffling Y:  52%|█████▏    | 52/100 [08:06<07:30,  9.38s/it]

0.2867493362977187
[0.08553305 0.13665557 0.03188594]


Shuffling Y:  53%|█████▎    | 53/100 [08:16<07:21,  9.40s/it]

0.3726478801763029
[0.08868148 0.15644607 0.04648562]


Shuffling Y:  54%|█████▍    | 54/100 [08:25<07:11,  9.38s/it]

0.6331584769145567
[0.08498552 0.05764042 0.10424691]


Shuffling Y:  55%|█████▌    | 55/100 [08:34<07:02,  9.38s/it]

0.25262391439555
[0.19810017 0.34689062 0.21614283]


Shuffling Y:  56%|█████▌    | 56/100 [08:44<06:52,  9.37s/it]

0.3454641553251969
[0.1867481  0.11606766 0.10914338]


Shuffling Y:  57%|█████▋    | 57/100 [08:53<06:42,  9.35s/it]

0.7595206005322173
[0.23934317 0.24235929 0.17091361]


Shuffling Y:  58%|█████▊    | 58/100 [09:02<06:33,  9.36s/it]

0.43306376896985976
[0.01466225 0.27858454 0.07933551]


Shuffling Y:  59%|█████▉    | 59/100 [09:12<06:23,  9.35s/it]

0.3310534306566819
[0.09181809 0.05183601 0.01881439]


Shuffling Y:  60%|██████    | 60/100 [09:21<06:14,  9.37s/it]

1.0
[0.05337873 0.21144178 0.21372319]


Shuffling Y:  61%|██████    | 61/100 [09:31<06:05,  9.38s/it]

0.5903336551627438
[0.13701029 0.11908525 0.05004466]


Shuffling Y:  62%|██████▏   | 62/100 [09:40<05:57,  9.42s/it]

0.6780629841426933
[0.04128682 0.18650993 0.03932941]


Shuffling Y:  63%|██████▎   | 63/100 [09:49<05:48,  9.41s/it]

0.07941072639871805
[0.11983359 0.12768714 0.0558174 ]


Shuffling Y:  64%|██████▍   | 64/100 [09:59<05:38,  9.39s/it]

0.7717828628226561
[0.07700695 0.13856183 0.11449413]


Shuffling Y:  65%|██████▌   | 65/100 [10:08<05:28,  9.39s/it]

0.7397750603405093
[0.1649479  0.30686191 0.12649154]


Shuffling Y:  66%|██████▌   | 66/100 [10:17<05:18,  9.36s/it]

0.6012862666000385
[0.09471215 0.23073738 0.16425114]


Shuffling Y:  67%|██████▋   | 67/100 [10:27<05:08,  9.34s/it]

0.3071403045219667
[0.12342123 0.13621075 0.0473534 ]


Shuffling Y:  68%|██████▊   | 68/100 [10:36<04:59,  9.36s/it]

0.39883718132562185
[0.22119813 0.23882009 0.05034369]


Shuffling Y:  69%|██████▉   | 69/100 [10:45<04:49,  9.34s/it]

0.30728695608507106
[0.28670182 0.18408653 0.15367733]


Shuffling Y:  70%|███████   | 70/100 [10:55<04:40,  9.34s/it]

0.6985113768121058
[0.15828327 0.26260842 0.14679679]


Shuffling Y:  71%|███████   | 71/100 [11:04<04:30,  9.34s/it]

0.05886328710448563
[0.26644865 0.31777917 0.20092618]


Shuffling Y:  72%|███████▏  | 72/100 [11:13<04:21,  9.35s/it]

0.5033553586284498
[0.16726894 0.17510878 0.12916196]


Shuffling Y:  73%|███████▎  | 73/100 [11:23<04:12,  9.37s/it]

0.18401142603404252
[0.12373758 0.35752782 0.14277711]


Shuffling Y:  74%|███████▍  | 74/100 [11:32<04:03,  9.35s/it]

0.0945592235541185
[0.12336481 0.27826665 0.12098945]


Shuffling Y:  75%|███████▌  | 75/100 [11:42<03:53,  9.36s/it]

0.2841387520288945
[0.10375873 0.1873088  0.1448101 ]


Shuffling Y:  76%|███████▌  | 76/100 [11:51<03:44,  9.35s/it]

0.8685248962380887
[0.07850009 0.07706094 0.16698733]


Shuffling Y:  77%|███████▋  | 77/100 [12:00<03:35,  9.35s/it]

0.3019536523880839
[0.056218   0.18555556 0.11523638]


Shuffling Y:  78%|███████▊  | 78/100 [12:10<03:25,  9.35s/it]

0.6302230258205606
[0.15213285 0.21817059 0.08861272]


Shuffling Y:  79%|███████▉  | 79/100 [12:19<03:16,  9.37s/it]

0.1711395105145026
[0.06334899 0.22900605 0.05158519]


Shuffling Y:  80%|████████  | 80/100 [12:28<03:07,  9.40s/it]

0.9461058486611102
[0.21061048 0.23469304 0.28983749]


Shuffling Y:  81%|████████  | 81/100 [12:38<02:58,  9.38s/it]

0.9218559876101942
[0.27310399 0.28134376 0.20757699]


Shuffling Y:  82%|████████▏ | 82/100 [12:47<02:48,  9.38s/it]

0.3116703375041834
[0.18539295 0.33507655 0.20982345]


Shuffling Y:  83%|████████▎ | 83/100 [12:57<02:39,  9.38s/it]

0.34746728231915924
[0.19194922 0.12237262 0.05403661]


Shuffling Y:  84%|████████▍ | 84/100 [13:06<02:29,  9.37s/it]

0.21361519711084365
[0.19384941 0.19060434 0.10502801]


Shuffling Y:  85%|████████▌ | 85/100 [13:15<02:21,  9.42s/it]

0.20256629767301937
[0.04184606 0.18073058 0.13780768]


Shuffling Y:  85%|████████▌ | 85/100 [13:19<02:21,  9.41s/it]


KeyboardInterrupt: 

In [10]:
count = 0
for _ in tqdm(range(100), desc='Shuffling Y'):
    np.random.shuffle(Y_shuffle)
    p_value = IS_test(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_shuffle,\
        X_target, Z_target, V_target, L=3, K=20, datatype='binary')
    if p_value < 0.05:
        count += 1
print(f'Simulated Type-I error rate for IS is {count/100}')
    

Shuffling Y: 100%|██████████| 100/100 [01:12<00:00,  1.39it/s]

Simulated Type-I error rate for IS is 0.04





## Our method

In [15]:
#1. Test without power enhancement
p_value = Test(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_source, \
     X_target, Z_target, V_target, L=3, K=20, datatype='binary')
print(f'P-value for csPCR is {p_value}')
    

P-value for csPCR is 0.15300633499865168


In [16]:
#2. Test with power enhancement

p_value = Test_pe(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_source, \
        X_target, Z_target, V_target, L=3, K=20, datatype='binary')
print(f'P-value for csPCR(pe) is {p_value}')

P-value for csPCR(pe) is 0.28085925797367395


### Use a different scoring function for testing V for the Power enhancement

In [17]:
# Originally, we use v*x for scoring, here we use (-v)*x to detect 
# the negative correlation between Y and V
p_value = Test_pe(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_source, \
        X_target, Z_target, V_target, L=3, K=20, datatype='binary', score = 'neg')
print(f'P-value for csPCR(pe) with negative scoring function is {p_value}')

P-value for csPCR(pe) with negative scoring function is 0.7262152709691574


## Benchmark
there are 3 benchmarks:1. Use source only data, 2. Use target only data, 3. Importance sampling method (the benchmark from others)

In [18]:
#1. Use source data only
#2. Test with power enhancement

p_value = PCR_test(X_source,Z_source,V_source,Y_source)
print(f'P-value for PCR with source data is {p_value}')

P-value for PCR with source data is 0.6250775742972228


In [19]:
#2. Use target data only

p_value = PCR_test(X_target,Z_target,V_target,Y_target)
print(f'P-value for PCR with target data is {p_value}')

P-value for PCR with target data is 0.39537210168587544


In [21]:
#3. Use Importance Sampling benchmark method

p_value = IS_test(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_source,\
        X_target, Z_target, V_target, L=3, K=20, datatype='binary')
print(f'P-value for IS is {p_value}')

P-value for IS is 0.7824830494337698


## Tune hyperparameter L

In [9]:
l_lst = [2, 5, 8, 10]
result_lst = []
for l in l_lst:
    # Use any test function above
    pvalue = Test_pe(X_e, Z_e, V_e, X_source, Z_source, V_source, Y_source, X_target, Z_target, V_target, L=3, K=20, datatype='binary')
    result_lst.append(pvalue)
    print(f'L is {l}, pvalue: {pvalue}')

Training accuracy for X|Z: 0.717
[0.23433064 0.19167032 0.28155873]
[167.95443516 165.08707733 174.75729811]
L is 2, pvalue: 0.933563916771966
Training accuracy for X|Z: 0.717
[0.2952632  0.20613484 0.31138347]
[140.64749537 187.91719237 174.08845475]
L is 5, pvalue: 0.06342484042790109
Training accuracy for X|Z: 0.717
[0.2715509  0.20274784 0.29688373]
[148.2607481  172.03479922 184.74858996]
L is 8, pvalue: 0.21636110362760874
Training accuracy for X|Z: 0.717
[0.29121139 0.13614264 0.34778643]
[146.78635223 166.57257311 184.67772408]
L is 10, pvalue: 0.195118444312963
