# Example 3: Computing p-values with DTransFusion

This notebook demonstrates how to use the `PPL_SI` package to compute selective inference p-values for the DTransFusion algorithm.

In [None]:
import numpy as np
import sys
sys.path.append('..')

from ppl_si import generate_synthetic_data, PPL_SI_DTF, PPL_SI_DTF_randj, source_estimator

## 1. Generate Synthetic Data

In [2]:
np.random.seed(42)

p = 300
num_sh = 10
num_inv = 5
K = 5
n_list = [100, 100, 100, 100, 100]
true_beta_sh = 0.5
Gamma = 0.1
itc = 0

X_list, Y_list, true_betaK, Sigma_list = generate_synthetic_data(
    p=p,
    num_sh=num_sh,
    num_inv=num_inv,
    K=K,
    n_list=n_list,
    true_beta_sh=true_beta_sh,
    Gamma=Gamma,
    itc=itc
)

XK = X_list[-1]
YK = Y_list[-1]
Sigma_K = Sigma_list[-1]

print(f"Number of tasks: {K}")
print(f"Feature dimension: {p}")
print(f"Target sample size: {n_list[-1]}")

Number of tasks: 5
Feature dimension: 300
Target sample size: 100


## 2. Set Parameters

In [3]:
lambda_k_list = [np.sqrt(2 * np.log(p) / n_list[k]) for k in range(K)]
lambda_0 = 2 * np.sqrt(np.log(p) / sum(n_list))
lambda_tilde = 2 * np.sqrt(np.log(p) / n_list[-1])
qk_weights = [0.2 * np.sqrt(n_list[k] / sum(n_list)) for k in range(K - 1)]
z_min = -20
z_max = 20

print(f"lambda_0: {lambda_0:.4f}")
print(f"lambda_tilde: {lambda_tilde:.4f}")
print(f"qk_weights: {[f'{w:.4f}' for w in qk_weights]}")

lambda_0: 0.2136
lambda_tilde: 0.4777
qk_weights: ['0.0894', '0.0894', '0.0894', '0.0894']


## 3. Compute p-values for All Selected Features

In [4]:
beta_tilde_list = []
for k in range(K - 1):
    beta_tilde_k = source_estimator(X_list[k], Y_list[k], lambda_k_list[k])
    beta_tilde_list.append(beta_tilde_k)

p_values = PPL_SI_DTF(XK, YK, beta_tilde_list, n_list, lambda_0, lambda_tilde, qk_weights, Sigma_K, z_min, z_max, num_segments=24)

if p_values is not None:
    print(f"\nNumber of selected features: {len(p_values)}")
    print("\nFeature index and p-values:")
    for j, p_val in p_values:
        print(f"Feature {j}: true_betaK[{j}] = {true_betaK[j]}, p-value = {p_val:.4f}")
else:
    print("No features selected")


Number of selected features: 12

Feature index and p-values:
Feature 0: true_betaK[0] = 0.5, p-value = 0.0008
Feature 1: true_betaK[1] = 0.5, p-value = 0.0000
Feature 2: true_betaK[2] = 0.5, p-value = 0.0001
Feature 3: true_betaK[3] = 0.5, p-value = 0.0004
Feature 4: true_betaK[4] = 0.5, p-value = 0.0000
Feature 5: true_betaK[5] = 0.5, p-value = 0.0001
Feature 6: true_betaK[6] = 0.5, p-value = 0.0000
Feature 7: true_betaK[7] = 0.5, p-value = 0.0046
Feature 8: true_betaK[8] = 0.5, p-value = 0.0000
Feature 9: true_betaK[9] = 0.5, p-value = 0.0000
Feature 23: true_betaK[23] = 0.0, p-value = 0.8883
Feature 112: true_betaK[112] = 0.0, p-value = 0.3085


## 4. Compute p-value for a Random Selected Feature

In [5]:
j, p_value_rand = PPL_SI_DTF_randj(XK, YK, beta_tilde_list, n_list, lambda_0, lambda_tilde, qk_weights, Sigma_K, z_min, z_max, num_segments=24)
if p_value_rand is not None:
    print(f"Random feature: {j} - true_betaK[{j}] = {true_betaK[j]} p-value  {p_value_rand:.4f}")
else:
    print("No features selected")

Random feature: 6 - true_betaK[6] = 0.5 p-value  0.0000


## 5. Analysis

In [6]:
if p_values is not None:
    alpha = 0.05
    significant_features = [(j, p) for j, p in p_values if p <= alpha]
    
    print(f"\nSignificance level: {alpha}")
    print(f"Number of significant features: {len(significant_features)}")
    
    if len(significant_features) > 0:
        print("\nSignificant features:")
        for j, p_val in significant_features:
            print(f"Feature {j}: true_betaK[{j}] = {true_betaK[j]} p-value = {p_val:.4f}")


Significance level: 0.05
Number of significant features: 10

Significant features:
Feature 0: true_betaK[0] = 0.5 p-value = 0.0008
Feature 1: true_betaK[1] = 0.5 p-value = 0.0000
Feature 2: true_betaK[2] = 0.5 p-value = 0.0001
Feature 3: true_betaK[3] = 0.5 p-value = 0.0004
Feature 4: true_betaK[4] = 0.5 p-value = 0.0000
Feature 5: true_betaK[5] = 0.5 p-value = 0.0001
Feature 6: true_betaK[6] = 0.5 p-value = 0.0000
Feature 7: true_betaK[7] = 0.5 p-value = 0.0046
Feature 8: true_betaK[8] = 0.5 p-value = 0.0000
Feature 9: true_betaK[9] = 0.5 p-value = 0.0000
