In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import math
import random
import numpy as np
import scipy.io as sio
from tqdm import trange
import sys; sys.path.insert(0, '../')
from selective_inference import forward_stop_rule, sfs_si, stepwise_selective_inference, subset_fdr
from sklearn.linear_model import LassoLars, lars_path
from abess import LinearRegression
from sklearn.preprocessing import StandardScaler
from best_subset import brute_force_all_subsets, brute_force
# from okridge.solvel0 import okridge_solvel0, okridge_solvel0_full
from solvel0 import solvel0
import statsmodels.api as sm
from scipy import stats
import fpsample
from dppy.finite_dpps import FiniteDPP
from kneefinder import KneeFinder
from weakident_python.weakindent_model import weak_ident
from skimage.restoration import estimate_sigma
import bm3d
import pysindy as ps
from utils import *
from UBIC import *

In [2]:
X_pre = np.load("../Cache/X_poly_NS.npy")
y_pre = np.load("../Cache/y_poly_NS.npy")
feature_names = np.load("../Cache/feature_names_poly_NS.npy", allow_pickle=True)

y_pre = y_pre[:, 0:1]

In [3]:
from hidimstat import (model_x_knockoff, 
                    model_x_knockoff_pvalue, 
                    model_x_knockoff_bootstrap_quantile, 
                    model_x_knockoff_bootstrap_e_value)

# selected, test_scores, threshold, X_tildes = model_x_knockoff(X_pre, 
#                                                               y_pre, 
#                                                               centered=True, 
#                                                               n_bootstraps=25, 
#                                                               fdr=0.1, 
#                                                               n_jobs=-1)

from sklearn.utils import resample
from abess import LinearRegression as AbessLinearRegression
from knockpy import KnockoffFilter, knockoff_stats, knockoffs
from knockpy.utilities import estimate_covariance
from scipy import stats
from statsmodels.stats.multitest import multipletests
from c2st.check import c2st # https://github.com/psteinb/c2st

u_pre = y_pre.copy()
X_pre_top = StandardScaler().fit_transform(X_pre)
y_pre = StandardScaler().fit_transform(u_pre)

lr = AbessLinearRegression(path_type='gs', s_max=10, fit_intercept=False, cv=5, screening_size=0)
fstat = knockoff_stats.ShapStatistic(model=lr)
kfilter = KnockoffFilter(ksampler='gaussian', fstat=fstat, knockoff_kwargs={'method':'ci'})

# kfilter = KnockoffFilter(ksampler='gaussian', fstat='lasso', knockoff_kwargs={'method':'ci'})

alibi is not installed in the environment.


In [4]:
fdr = 1/3 # 1/4, 1/3, 1/2
rejections = []
test_scores = []
thresholds = []
for _ in trange(50):
    np.random.seed(_)
    X_resample, y_resample = resample(X_pre_top, y_pre.flatten(), n_samples=10000, replace=True, random_state=_)
    rejection = kfilter.forward(X=X_resample, y=y_resample, fdr=fdr, shrinkage="ledoitwolf", recycle_up_to=0.5, tol=1e-4)
    # rejection = kfilter.forward(X=X_pre_top, y=y_pre.flatten(), fdr=fdr, shrinkage="ledoitwolf", recycle_up_to=0.5, tol=1e-4)
    rejection = sorted(set(np.where(rejection == 1)[0]))
    if len(rejection) > 0:
        rejections.append(rejection)
        test_scores.append(kfilter.W)
        thresholds.append(kfilter.threshold)
del X_resample, y_resample

100%|███████████████████████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.38it/s]


In [5]:
aggregated_ko_selection, _, _ = model_x_knockoff_bootstrap_quantile(test_scores, 
                                                                    fdr=fdr, 
                                                                    adaptive_aggregation=False)
print(feature_names[aggregated_ko_selection])

eval_selection, _, _ = model_x_knockoff_bootstrap_e_value(test_scores, thresholds, fdr=fdr)
print(feature_names[eval_selection])

['w' 'v' 'w_{xx}' 'w_{yy}' 'ww_{y}' 'uw_{x}' 'uw_{yy}' 'vw_{y}']
['w' 'v' 'w_{x}' 'w_{xx}' 'w_{yy}' 'ww_{y}' 'ww_{xx}' 'uw_{x}' 'uw_{xx}'
 'uw_{yy}' 'vw_{y}']


In [6]:
rejections = np.array(eval_selection)
X_pre_top = X_pre[:, rejections]
feature_names[rejections]

array(['w', 'v', 'w_{x}', 'w_{xx}', 'w_{yy}', 'ww_{y}', 'ww_{xx}',
       'uw_{x}', 'uw_{xx}', 'uw_{yy}', 'vw_{y}'], dtype='<U7')

In [7]:
X_pre_top_norm  = np.linalg.norm(X_pre_top, 2, axis=0)
X_pre_top = X_pre_top/X_pre_top_norm
y_pre = u_pre.copy()

In [8]:
best_subsets = solvel0(X_pre_top, y_pre.flatten(), max_complexity=4)
feature_names[np.array(best_subsets[-1])]

  0%|                                                                                     | 0/4 [00:00<?, ?it/s]

Set parameter Username
Academic license - for non-commercial use only - expires 2026-04-04


100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:09<00:00,  2.38s/it]


array(['w_{x}', 'w_{y}', 'ww_{x}', 'ww_{yy}'], dtype='<U7')

### Slow

In [9]:
# best_subsets = brute_force_all_subsets(X_pre_top, y_pre)[1]
# feature_names[rejections[list(best_subsets[3])]]

# from l0bnb import fit_path

# sols = fit_path(X_pre_top, y_pre.flatten(), lambda_2 = 1e-6, max_nonzeros = 8)

# feature_names[rejections][np.nonzero(sols[4]['B'])[0]]