In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, chi2, truncnorm
from tqdm import trange
from sklearn import linear_model
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso

import matplotlib.pyplot as plt

## 生成数据

In [14]:
n, p=500, 20
s = np.random.normal(0, 1, p)
t = s + np.random.normal(0, 0.1, p)
u = np.random.normal(0, 1, p)

In [15]:
def generate(n, p, s, t, u, Alpha = 0):
    Z_sample[i], Z_target[i] = np.zeros((n, p)), np.zeros((n, p))
    for i in range(n):
        Z_sample[i] = np.random.normal(0, 1, p)
        Z_target[i] = np.random.normal(0.1, 1, p)
        
    X_sample = Z @ u + np.random.normal(0, 1, n)
    X_target = Z @ u + np.random.normal(0, 1, n)
    
    V_sample = Z @ s + np.random.normal(0, 1, n) + X
    V_target = Z @ t + np.random.normal(0, 1, n) - X
    
    Y = np.zeros(n)
    for i in range(n):
        Y_sample[i] = Z_sample[i] @ Z_sample[i] + Z_sample[i].sum() + X_sample[i] + V_sample + np.random.normal(0, 1, 1)
        Y_target[i] = Z_target[i] @ Z_target[i] + Z_target[i].sum() + X_target[i] + V_target + np.random.normal(0, 1, 1)
    return Y, X_sample, X_target, V_sample, V_target, Z

In [16]:
Y, X_sample, X_target, Z = generate(s, t, 0)

In [17]:
a = np.c_[Z,X_sample]
reg = LassoCV().fit(a,Y)

def Covariate_Shift_Weight(x, z, v = 0):
    return np.exp(((x - z @ s)**2 - (x - z @ t)**2)/2)

def Model_X(z, v = 0):
    return z @ t + np.random.normal(0, 1, 1)

def T_statistic(y, x, z, v = 0):
    b = np.append(Z[0],X_sample[0])
    c = reg.predict(b.reshape(1,21))
    return (y - 1.5*x - z @ z) ** 2

In [18]:
for i in range(10):
    x=X_sample[i]
    z=Z[i]
    print(Covariate_Shift_Weight(x, z))

1.6706944384034188
1.7496329547856648
0.9858853290795901
1.0081388816838919
1.092654554498052
0.894500168911984
0.5633644789535892
0.7849766073724715
0.6026440809542113
0.8681296028230419


In [19]:
def Conterfeits(y, x, z, v = 0, L = 5, K = 10):
    M = L * K - 1
    cnt = 0
    
    for i in range(M):
        x_ = Model_X(z, v)
        if T_statistic(y, x, z, v) > T_statistic(y, x_, z, v):
            cnt=cnt+1
            
    return cnt // K

def PCRtest(Y, X, Z, V = 0, L = 5, K = 20, covariate_shift = True):
    n = Y.size
    W = np.array([0.0]*L)
    
    for j in range(n):
        y, x, z, v = Y[j], X[j], Z[j], 0
        if covariate_shift == True:
            W[Conterfeits(y, x, z, v, L, K)] += Covariate_Shift_Weight(x, z, v)
        if covariate_shift == False:
            W[Conterfeits(y, x, z, v, L, K)] += 1
            
    return W, L/n * np.dot(W - n/L, W - n/L)

In [22]:
PCRtest(Y, X_sample, Z, L = 5, K = 20, covariate_shift = True)

(array([ 88.6912104 , 111.64327617, 114.99401231,  89.91094202,
         89.4021802 ]),
 7.023778827144947)

In [23]:
PCRtest(Y, X_target, Z, L = 5, K = 20, covariate_shift = False)

(array([ 99., 109., 105.,  87., 100.]), 2.7600000000000002)

In [35]:
def int_(x):
    if x >= 80: return 79
    return int(x)

def Mutiple_Tests(n, s, t, Alpha):
    X = []
    for i in trange(n):
        Y, X_sample, X_target, Z = generate(s, t, Alpha)
        u, v = PCRtest(Y, X_sample, Z, L = 5, K = 20, covariate_shift = True)
        X.append(v)
    return X

In [37]:
H_0 = Mutiple_Tests(100, s, t, 1)
H_1 = Mutiple_Tests(100, s, t, 0)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [11:26<00:00,  6.86s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [11:27<00:00,  6.87s/it]


In [43]:
H_1.sort()
H_0.sort()

In [48]:
H_0

[1.948400469401161,
 2.4299937478718165,
 3.6434562977166247,
 4.192055619339859,
 4.361423853852116,
 4.814655410600205,
 5.145560994303507,
 5.222000395658265,
 5.476832275572569,
 5.505930989379524,
 7.0620107426429675,
 7.444367408318267,
 8.27021471609173,
 8.608313958006407,
 8.756883913548155,
 8.944933412748505,
 9.097236905228986,
 9.433770724795512,
 9.617372847626033,
 9.79271628775856,
 9.859879983039056,
 10.044168220093953,
 10.202166567383404,
 10.292358237785825,
 10.32457997616735,
 10.336217151392265,
 10.989299130225348,
 11.087379669699786,
 11.768842089186142,
 11.782698961150928,
 11.847374900962862,
 11.98667639833087,
 12.115183048170502,
 12.230560158327991,
 12.305185474654946,
 12.36829839684255,
 12.373638675943058,
 12.386005507473712,
 12.69996047013001,
 12.763876774775976,
 12.8307545885898,
 13.227255981724301,
 13.26694112800803,
 13.384423006898544,
 13.479112326937997,
 13.73255689907025,
 13.992335774503681,
 14.002229716368825,
 14.804101972605004,

In [70]:
def Density_Variance(n):
    a, b=0, 0
    for i in range(n):
        z = np.random.normal(0, 1, p)
        x = z @ s + np.random.normal(0, 1, 1)
        a += Covariate_Shift_Weight(x, z)**2
        b += np.exp((z @ t - z @ s)**2)
    return a/n, b/n

In [71]:
Density_Variance(100000)

(array([1.14891235]), 1.161448981209541)