In [None]:
# R to Python conversion of simulation setup
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.preprocessing import StandardScaler

# Set parameters (matching R code exactly)
n = 500
p = 200
nonzero_coefs = 30
Amp = 0.25
rho = 0.6

# Generate Toeplitz correlation matrix (equivalent to toeplitz(rho^(0:(p-1))))
Theta_8 = np.array([[rho**abs(i-j) for j in range(p)] for i in range(p)])

# Generate X from multivariate normal (equivalent to mvrnorm)
X = multivariate_normal.rvs(mean=np.zeros(p), cov=Theta_8, size=n, random_state=42)

# Scale X (equivalent to scale(X))
X = StandardScaler().fit_transform(X)

# Generate true coefficients (equivalent to beta setup)
beta = np.zeros(p)
beta[:nonzero_coefs] = np.random.choice([-Amp, Amp], size=nonzero_coefs, replace=True)

# Signal indices (equivalent to Signal_index <- 1:nonzero_coefs)
Signal_index = np.arange(nonzero_coefs)  # 0-indexed in Python vs 1-indexed in R

# True labels (equivalent to true_labels <- beta != 0)
true_labels = beta != 0

# Generate response (equivalent to y <- X %*% beta + rnorm(n))
y = X @ beta + np.random.normal(0, 1, n)

# Center y (equivalent to y <- y - mean(y))
y = y - np.mean(y)

print(f"Data shape: X={X.shape}, y={y.shape}")
print(f"True signal indices: {Signal_index}")
print(f"Number of true signals: {np.sum(true_labels)}")
print(f"y range: [{np.min(y):.3f}, {np.max(y):.3f}]")
print(f"y mean: {np.mean(y):.6f} (should be ~0)")

Data shape: X=(500, 200), y=(500,)
True signal indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]
Number of true signals: 30
y range: [-5.483, 6.692]
y mean: -0.000000 (should be ~0)


In [6]:
from nullstrap.models.lm import NullstrapLM

# After the simulation code above...

# Initialize Nullstrap
model = NullstrapLM(
    fdr=0.1,           # Target FDR
    alpha_=None,       # Auto-select alpha (regularization parameter)
    B_reps=5,          # Bootstrap repetitions
    random_state=42    # For reproducibility
)

# Fit the model
model.fit(X, y)

# Get results
threshold = model.threshold_
selected_features = model.selected_
statistics = model.statistic_
correction_factor = model.correction_factor_
alpha_used = model.alpha_used_

print(f"Threshold: {threshold}")
print(f"Selected features: {selected_features}")
print(f"Number selected: {len(selected_features)}")
print(f"Correction factor: {correction_factor}")
print(f"Alpha used: {alpha_used}")

Threshold: 0.10795775974834527
Selected features: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 21 22 24 25 26
 27 28 29]
Number selected: 27
Correction factor: 0.22865450128243625
Alpha used: 0.02549875099838858


In [7]:
from sklearn.linear_model import LassoCV
import numpy as np

# Fit Lasso with cross-validation to select lambda
lasso = LassoCV(cv=5, random_state=42).fit(X, y)

# Get the coefficients
lasso_coef = lasso.coef_

# Select features with nonzero coefficients
lasso_selected = np.where(lasso_coef != 0)[0]

print(f"Lasso selected features: {lasso_selected}")
print(f"Number selected by Lasso: {len(lasso_selected)}")
print(f"Lasso alpha (lambda) used: {lasso.alpha_}")

Lasso selected features: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  20  21  22  24  25  26  27  28  29  32  45  53  60  62  64  74  79
  84  87  94  97 104 107 112 121 123 124 125 136 142 146 149 155 174 177
 178 195 199]
Number selected by Lasso: 57
Lasso alpha (lambda) used: 0.044355026497695405


In [8]:
from nullstrap.utils.metrics import compute_power, compute_fdr

# Compute true support
true_support = np.where(beta != 0)[0]

# Nullstrap results
nullstrap_power = compute_power(selected_features, true_support)
nullstrap_fdr = compute_fdr(selected_features, true_support, p) 
print(f"Nullstrap Power: {nullstrap_power:.3f}")
print(f"Nullstrap FDR: {nullstrap_fdr:.3f}")

# Lasso results
lasso_power = compute_power(lasso_selected, true_support)
lasso_fdr = compute_fdr(lasso_selected, true_support, p)
print(f"Lasso Power: {lasso_power:.3f}")
print(f"Lasso FDR: {lasso_fdr:.3f}")

Nullstrap Power: 0.900
Nullstrap FDR: 0.000
Lasso Power: 0.933
Lasso FDR: 0.509
