In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib.pyplot as plt
import scienceplots

import os
import numpy as np
import statsmodels.api as sm

from solvel0 import solvel0
from sklearn.linear_model import LinearRegression

from rdata import read_rds

In [None]:
R_path = "/Users/pongpisitthanasutives/Desktop/research/R/python_data"
os.path.join(R_path, 'X_pre_burgers_noise50.npy')
X_pre = np.load(os.path.join(R_path, 'X_pre_burgers_noise50.npy'))
y_pre = np.load(os.path.join(R_path, 'y_pre_burgers_noise50.npy'))

In [None]:
bestsubset = solvel0(X_pre, y_pre, max_complexity=2, miosr=True)[-1]

select = np.zeros(X_pre.shape[-1]).astype(np.bool_)
select[list(bestsubset)] = True

X_sel = X_pre[:, select]
X_not_select = X_pre[:, ~select]

lr = LinearRegression(fit_intercept=False)
lr.fit(X_sel, y_pre)
y_est = lr.predict(X_sel)

X_test = np.hstack([y_est, X_not_select])

In [None]:
# from econml.sklearn_extensions.linear_model import DebiasedLasso
# lasso = DebiasedLasso(fit_intercept=False).fit(X_test, y_pre)
# print(lasso.coef_)
# print(lasso.coef_stderr_)

In [None]:
active_set = []
pvalues = []
nfeats = X_test.shape[-1]
while len(active_set) < nfeats:
    possible_indices = [i for i in range(X_test.shape[-1]) if i not in active_set]
    r2s = []
    fits = []
    for i in possible_indices:
        fit = sm.OLS(y_pre, X_test[:, active_set+[i]]).fit()
        fits.append(fit)
        r2s.append(fit.rsquared)
    active_set.append(possible_indices[np.argmax(r2s)])
    # print(active_set)
    pvalues.append(fits[np.argmax(r2s)].pvalues[-1])
assert len(active_set) == nfeats

In [None]:
fsInf = read_rds("/Users/pongpisitthanasutives/Desktop/research/R/fsInf_burgers_noise50.rds")
adjusted_pvalues = fsInf.get("pv")


In [None]:
with plt.style.context("science"):
    plt.figure(figsize=(6, 3))
    plt.plot([i for i in range(1, len(active_set)+1)], pvalues, '-o', c='blue', markerfacecolor='none', label='Naive (unadjusted)')
    plt.plot([i for i in range(1, len(active_set)+1)], adjusted_pvalues, '-o', c='black', markerfacecolor='none', label='Selection-adjusted')
    plt.hlines(xmin=1, xmax=len(active_set), y=0.01, colors='red', linestyles='dashed', label="Referenced threshold $= 0.01$")
    plt.ylabel("P-Value")
    plt.xlabel("\# of predictors entered")
    plt.xticks([1]+[i for i in range(1, len(active_set)+1) if i%5 == 0])
    plt.legend()
    plt.tight_layout()
    plt.savefig("Figures/pv_burgers_noise50.pdf")
    plt.show()