# Example 1: Computing p-values with Pretrained Lasso

This notebook demonstrates how to use the `PPL_SI` package to compute selective inference p-values for the Pretrained Lasso algorithm.

In [1]:
import numpy as np
import sys
sys.path.append('..')

from ppl_si import generate_synthetic_data, PPL_SI, PPL_SI_randj

## 1. Generate Synthetic Data

In [3]:
np.random.seed(42)

p = 300
num_sh = 10
num_inv = 5
K = 5
n_list = [100, 100, 100, 100, 50]  
true_beta_sh = 0.1
true_beta_inv = 0.1

X_list, Y_list, true_betaK, Sigma_list = generate_synthetic_data(
    p=p,
    num_sh=num_sh,
    num_inv=num_inv,
    K=K,
    n_list=n_list,
    true_beta_sh=true_beta_sh,
)

print(f"Number of tasks: {len(X_list)}")
print(f"Feature dimension: {p}")
print(f"Target sample size: {n_list[-1]}")

Number of tasks: 5
Feature dimension: 300
Target sample size: 50


## 2. Set Parameters

In [8]:
lambda_sh = 50
lambda_K = 5
rho = 0.5
z_min = -20
z_max = 20

print(f"lambda_sh: {lambda_sh}")
print(f"lambda_K: {lambda_K}")
print(f"rho: {rho}")

lambda_sh: 50
lambda_K: 5
rho: 0.5


## 3. Compute p-values for All Selected Features

In [9]:
p_values = PPL_SI(
    X_list=X_list,
    Y_list=Y_list,
    lambda_sh=lambda_sh,
    lambda_K=lambda_K,
    rho=rho,
    Sigma_list=Sigma_list,
    z_min=z_min,
    z_max=z_max
)

if p_values is not None:
    print(f"\nNumber of selected features: {len(p_values)}")
    print("\nFeature index and p-values:")
    for j, p_val in p_values:
        print(f"Feature {j}: p-value = {p_val:.4f}")
else:
    print("No features selected")


Number of selected features: 23

Feature index and p-values:
Feature 0: p-value = 0.4647
Feature 1: p-value = 0.4005
Feature 2: p-value = 0.4257
Feature 4: p-value = 0.8520
Feature 5: p-value = 0.9508
Feature 8: p-value = 0.6131
Feature 33: p-value = 0.5948
Feature 34: p-value = 0.3669
Feature 71: p-value = 0.5895
Feature 81: p-value = 0.4767
Feature 91: p-value = 0.6627
Feature 121: p-value = 0.8051
Feature 125: p-value = 0.2926
Feature 128: p-value = 0.8362
Feature 157: p-value = 0.8146
Feature 180: p-value = 0.3783
Feature 200: p-value = 0.9205
Feature 210: p-value = 0.4887
Feature 228: p-value = 0.9826
Feature 246: p-value = 0.1815
Feature 266: p-value = 0.2658
Feature 287: p-value = 0.7781
Feature 297: p-value = 0.4082


## 4. Compute p-value for a Random Selected Feature

In [10]:
p_value_rand = PPL_SI_randj(
    X_list=X_list,
    Y_list=Y_list,
    lambda_sh=lambda_sh,
    lambda_K=lambda_K,
    rho=rho,
    Sigma_list=Sigma_list,
    z_min=z_min,
    z_max=z_max
)

if p_value_rand is not None:
    print(f"Random feature p-value: {p_value_rand:.4f}")
else:
    print("No features selected")

Random feature p-value: 0.8051


## 5. Analysis

In [11]:
if p_values is not None:
    alpha = 0.05
    significant_features = [(j, p) for j, p in p_values if p <= alpha]
    
    print(f"\nSignificance level: {alpha}")
    print(f"Number of significant features: {len(significant_features)}")
    
    if len(significant_features) > 0:
        print("\nSignificant features:")
        for j, p_val in significant_features:
            print(f"Feature {j}: p-value = {p_val:.4f}")


Significance level: 0.05
Number of significant features: 0
