# Example 2: Computing p-values with Parameter-Only Pretrained Lasso

This notebook demonstrates how to use the `PPL_SI` package to compute selective inference p-values for the Parameter-Only Pretrained Lasso algorithm.

In [None]:
import numpy as np
import sys
sys.path.append('..')

from ppl_si import generate_synthetic_data, Pretrain_Lasso, PPL_SI_param_only, PPL_SI_param_only_randj

## 1. Generate Synthetic Data

In [None]:
np.random.seed(42)

p = 300
num_sh = 10
num_inv = 5
K = 5
n_list = [100, 100, 100, 100, 100]  
true_beta_sh = 0.3

X_list, Y_list, true_betaK, Sigma_list = generate_synthetic_data(
    p=p,
    num_sh=num_sh,
    num_inv=num_inv,
    K=K,
    n_list=n_list,
    true_beta_sh=true_beta_sh,
    itc=0.5,
    Gamma=0.1
)

X = np.concatenate(X_list)
Y = np.concatenate(Y_list)
XK = X_list[-1]
YK = Y_list[-1]
Sigma_K = Sigma_list[-1]

print(f"Number of groups: {len(X_list)}")
print(f"Feature dimension: {p}")
print(f"Target sample size: {n_list[-1]}")

Number of groups: 5
Feature dimension: 300
Target sample size: 100


## 2. Set Parameters

In [8]:
lambda_sh = 90
lambda_K = 10
rho = 0.1
z_min = -20
z_max = 20

print(f"lambda_sh: {lambda_sh}")
print(f"lambda_K: {lambda_K}")
print(f"rho: {rho}")

lambda_sh: 90
lambda_K: 10
rho: 0.1


## 3. Compute p-values for All Selected Features

In [None]:
beta_sh = Pretrain_Lasso(X, Y, lambda_sh)

p_values = PPL_SI_param_only(beta_sh, XK, YK, lambda_K, rho, Sigma_K, z_min=z_min, z_max=z_max)

if p_values is not None:
    print(f"\nNumber of selected features: {len(p_values)}")
    print("\nFeature index and p-values:")
    for j, p_val in p_values:
        print(f"Feature {j}: true_betaK[{j}] = {true_betaK[j]}, p-value = {p_val:.4f}")
else:
    print("No features selected")


Number of selected features: 9

Feature index and p-values:
Feature 0: true_betaK[0] = 0.5826086510393981, p-value = 0.0000
Feature 2: true_betaK[2] = 0.3, p-value = 0.0043
Feature 3: true_betaK[3] = 0.3, p-value = 0.0176
Feature 4: true_betaK[4] = 0.3, p-value = 0.2821
Feature 5: true_betaK[5] = 0.3, p-value = 0.0006
Feature 6: true_betaK[6] = 0.3, p-value = 0.0002
Feature 7: true_betaK[7] = 0.3, p-value = 0.0492
Feature 9: true_betaK[9] = 0.3, p-value = 0.0243
Feature 10: true_betaK[10] = 0.3, p-value = 0.0050


## 4. Compute p-value for a Random Selected Feature

In [None]:
j, p_value_rand = PPL_SI_param_only_randj(beta_sh, XK, YK, lambda_K, rho, Sigma_K, z_min=z_min, z_max=z_max)

if p_value_rand is not None:
    print(f"Random feature: {j} - true_betaK[{j}] = {true_betaK[j]} p-value  {p_value_rand:.4f}")
else:
    print("No features selected")

Random feature: 7 - true_betaK[7] = 0.3 p-value  0.0492


## 5. Analysis

In [12]:
if p_values is not None:
    alpha = 0.05
    significant_features = [(j, p) for j, p in p_values if p <= alpha]
    
    print(f"\nSignificance level: {alpha}")
    print(f"Number of significant features: {len(significant_features)}")
    
    if len(significant_features) > 0:
        print("\nSignificant features:")
        for j, p_val in significant_features:
            print(f"Feature {j}: true_betaK[{j}] = {true_betaK[j]} p-value = {p_val:.4f}")


Significance level: 0.05
Number of significant features: 8

Significant features:
Feature 0: true_betaK[0] = 0.5826086510393981 p-value = 0.0000
Feature 2: true_betaK[2] = 0.3 p-value = 0.0043
Feature 3: true_betaK[3] = 0.3 p-value = 0.0176
Feature 5: true_betaK[5] = 0.3 p-value = 0.0006
Feature 6: true_betaK[6] = 0.3 p-value = 0.0002
Feature 7: true_betaK[7] = 0.3 p-value = 0.0492
Feature 9: true_betaK[9] = 0.3 p-value = 0.0243
Feature 10: true_betaK[10] = 0.3 p-value = 0.0050
