In this short demo, I will show how to use the csPCR test on a simulated dataset.

In [1]:
'''
First, we generate a dataset, we can change the Alpha_s, Alpha_t and effect parameter to change the distribution of
the generated dataset.
'''

import numpy as np

def generate(ns, nt, p,q, s, t, u, Alpha_s=0, Alpha_t = 1, effect = 2,z_diff = 0.1):
    Zs_null = np.random.normal(0,0.1, (ns, q))
    Zt_null = np.random.normal(0,0.1, (nt, q))
    
    Z_source = np.hstack((np.random.normal(0, 1, (ns, p)) , Zs_null))
    Z_target = np.hstack((np.random.normal(z_diff, 1, (nt, p)) , Zt_null))
    
    X_source = Z_source[:, :p] @ u + np.random.normal(0, 1, ns)
    X_target = Z_target[:, :p] @ u + np.random.normal(0, 1, nt)

    V_source = Z_source[:, :p] @ s + Alpha_s * X_source + np.random.normal(0, 5, ns)
    V_target = Z_target[:, :p] @ t + Alpha_t * X_target + np.random.normal(0, 5, nt)
    
    # V_source = Z_source[:, :p] @ s + 2*X_source 
    # V_target = Z_target[:, :p] @ t - 2*X_target
    
    Y_source = (Z_source[:, :p].sum(axis=1))**2 + effect*V_source + np.random.normal(0, 1, ns) 
    Y_target = (Z_target[:, :p].sum(axis=1))**2 + effect*V_target + np.random.normal(0, 1, nt) 
    
    
    return Y_source.reshape(-1, 1), X_source.reshape(-1, 1), V_source.reshape(-1, 1), Z_source,\
           Y_target.reshape(-1, 1), X_target.reshape(-1, 1), V_target.reshape(-1, 1), Z_target


'''
According to the generation process, 
we write 2 functions to calculate the xz_ratio and true density ratio of the data
'''
def xz_ratio(X, Z, V, s, t,p,q,z_diff = 0.1):
    ratios = []
    for i in range(V.shape[0]):
        zs_prob = multivariate_normal.pdf(Z[i][:p], mean = 0*np.ones(p), cov= 1*np.identity(p))
        zt_prob = multivariate_normal.pdf(Z[i][:p], mean = z_diff*np.ones(p), cov= 1*np.identity(p))
        ratios.append((zt_prob)/(zs_prob))
    
    return np.array(ratios)

def true_density_ratio(X, Z, V, s, t,p,q, Alpha_s = 0, Alpha_t = 1,z_diff = 0.1):
    ratios = []
    size = V.shape[0]
    for i in range(size):
        zs_prob = multivariate_normal.pdf(Z[i][:p], mean = 0*np.ones(p), cov= 1*np.identity(p))
        vs_prob = norm.pdf(V[i], loc=Z[i][:p]@s + Alpha_s*X[i], scale =5)
        zt_prob = multivariate_normal.pdf(Z[i][:p], mean = z_diff*np.ones(p), cov= 1*np.identity(p))
        vt_prob = norm.pdf(V[i], loc=Z[i][:p]@t + Alpha_t*X[i], scale =5)
        ratios.append((zt_prob*vt_prob)/(zs_prob*vs_prob))

    return np.array(ratios)

In [14]:
'''
Define some parameters for data generation
'''
ns,nt, p,q = 2000,2000, 5, 50

s = np.array([-0.56228753, -1.01283112,  0.31424733, -0.90802408, -1.4123037 ])
t = np.array([ 1.46564877, -0.2257763 ,  0.0675282 , -1.42474819, -0.54438272])
u = np.array([ 0.11092259, -1.15099358,  0.37569802, -0.60063869, -0.29169375])

In [15]:
from csPCR_functions import *

In [16]:
Y_source, X_source, V_source, Z_source, Y_target, X_target, V_target, Z_target = generate(ns,nt, p,q, s, t, u)

'''
Since this method is model_X framework, we assume that the X|Z,V and Z distribution are known. So we need to define three 
extra functions as the input of the test.
1. Model_X: Should be a function of the conditional model of X|Z,V
   Input: z, v values(float)
   Return: one X sample follows the X|Z,V distribution

2. E_X: Should be a function of the conditional expectaion of X|Z,V
   Input: z,v values(float)
   Return: the calculated conditional expectation E[X|Z,V]

3. xz_ratio: Should a function for calculating the (X,Z) density ratio (i.e. P_t(X,Z,V)/P_s(X,Z,V))
   Input: X, Z (ndarrays)
   Return: density ratio array (ndarray)
'''

def Model_X(z, v):
    # Conditional distribution of X|Z
    return z[:5] @ u + np.random.normal(0, 1, 1)

def E_X(z, v):
    # Conditional expectation of X|Z
    return z[:5] @ u


def xz_ratio(X, Z, z_diff = 0.1):
    ratios = []
    for i in range(X.shape[0]):
        zs_prob = multivariate_normal.pdf(Z[i][:p], mean = 0*np.ones(p), cov= 1*np.identity(p))
        zt_prob = multivariate_normal.pdf(Z[i][:p], mean = z_diff*np.ones(p), cov= 1*np.identity(p))
        ratios.append((zt_prob)/(zs_prob))
    
    return np.array(ratios)

'''
To use the Test function, need input of source and target X,Z,V and source Y data into the function,
Also take 3 functions mentioned above as inputs.
'''
Test(X_source, Z_source, V_source, Y_source, X_target, Z_target, V_target,\
     model_X = Model_X, E_X = E_X, xz_ratio = xz_ratio)


0.041140270575848126