In [12]:
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.special import gamma
from numba import jit

In [13]:
def compute_sigma(Y, X, beta):
    sigma = np.linalg.norm(Y - X @ beta)
    return sigma / np.sqrt(p)

In [14]:
@jit(nopython=True)
def second(X, Y, t, k_max):
    s = 0.5
    beta_cur = np.zeros(p)
    v_cur    = np.zeros(p)
    q_cur    = np.zeros(p)
    for k in range(1, k_max):
        beta_prev = beta_cur
        p1       =  2*k/(2*k+2*s+1)
        q_half   =  p1*q_cur + p1*(t / 2) * np.dot(X.T, (Y - np.dot(X, beta_cur))) 
        beta_cur = beta_prev + t * q_half
        a        = (2*k-2*s+1)*(k+1)/(2*k+2*s+3)/k
        v_cur    =  beta_cur + 2*t*a*q_half
        q_cur    =  (2*k+1-2*s)/(2*k+2)*q_half +(t / 2) * np.dot(X.T, (Y - np.dot(X, v_cur))) 
        if np.linalg.norm(Y - np.dot(X, beta_cur)) <= delta:
        # if np.linalg.norm(beta_cur - beta) >= np.linalg.norm(beta_prev - beta) and k >= 500:
            break
    return beta_cur, k

@jit(nopython=True)
def second_debias(X, Y, t, k_max):
    s = 0.5
    beta_cur = np.zeros(p)
    v_cur    = np.zeros(p)
    q_cur    = np.zeros(p)

    for k in range(1, k_max+1):
        p1       =  2*k/(2*k+2*s+1)
        q_half   =  p1*q_cur + p1*(t / 2) * np.dot(X.T, (Y - np.dot(X, beta_cur))) 
        beta_cur = beta_cur + t * q_half
        a        = (2*k-2*s+1)*(k+1)/(2*k+2*s+3)/k
        v_cur    =  beta_cur + 2*t*a*q_half
        q_cur    =  (2*k+1-2*s)/(2*k+2)*q_half +(t / 2) * np.dot(X.T, (Y - np.dot(X, v_cur))) 
        if k == k_max:
            break
    return beta_cur

In [15]:

def compute_norminf_second(seed):
    np.random.seed(seed)
    e_second = np.random.normal(0, sigma_second, n)
    Y_second = np.dot(X, beta_second) + e_second
    e_second_debias = np.random.normal(0, sigma_second_debias, n)
    Y_second_debias = np.dot(X, beta_second_debias) + e_second_debias
    beta_Secondstar, k_Secondstar = second(X, Y_second, t5, max_iter,)
    beta_secondstar, k_secondstar = second(X, Y_second_debias, t5, max_iter)
    beta_Secondstar_debias = second_debias(X, Y_second_debias, t5, k_secondstar)
    beta_Secondstar_debias = 2*beta_secondstar-beta_Secondstar_debias
    beta_Secondstar[np.abs(beta_Secondstar) <= bn_second] = 0
    beta_Secondstar_debias[np.abs(beta_Secondstar_debias) <= bn_second_debias]=0
    norminf_second = np.linalg.norm(beta_secondstar - beta_second, ord=np.inf)
    norminf_second_debias = np.linalg.norm(beta_Secondstar_debias - beta_second_debias, ord=np.inf)
    return norminf_second, norminf_second_debias



In [16]:
beta_names = [
    'beta_second', 'beta_second_debias', 
]

variable_names = [
    'second', 'second_debias', 
]


In [17]:
n        = 1000    # 样本量
p        = 1000    # 变量数
I        = np.eye(p)
t5       = 2.5e-4
# 2.5e-4 84 0
max_iter = 10000   # 最大迭代次数
k_max    = 1000  # 最大迭代次数
kappa    = 0.5    # 加速阶数
omega    = 3
eta      = 5
tau      = 1.1

i_second = 0
i_second_debias = 0
Sigma_second = 0
Sigma_second_debias = 0
Inf_norm_second = 0
Inf_norm_second_debias = 0
Norm_second = 0
Norm_second_debias = 0

In [18]:
np.random.seed(24)
cov_matrix = np.full((p, p), 0.5)  # 创建一个所有元素都是 0.5 的矩阵
np.fill_diagonal(cov_matrix, 2.0)  # 将对角线元素设置为 2.0

mean_vector = np.zeros(p)  # 均值向量
X = np.random.multivariate_normal(mean_vector, cov_matrix, n)
# 奇异值分解
U, S, Vt = np.linalg.svd(X)
# 定义β向量
beta = np.zeros(p)  # 初始化β向量为0

# 根据条件设置β向量的值
beta[0:5]   = 2.0   # βi = 2.0, i = 1, 2, 3, 4, 5
beta[5:10]  = -2.0  # βi = -2.0, i = 6, 7, 8, 9, 10
beta[10:15] = 1.0   # βi = 1.0, i = 11, 12, 13, 14, 15
beta[15:20] = -1.0  # βi = -1.0, i = 16, 17, 18, 19, 20

# Generate 20 random indices
indices = np.random.randint(low=0, high=p, size=20)

# Get the indices of non-zero elements in beta
nonzero_indices = np.nonzero(beta)[0]
random_beta = np.zeros(p)
random_beta[indices] = beta[nonzero_indices]
beta = random_beta



In [19]:
for x in range(1000):
    print(x)
    np.random.seed(2*x)
    # 生成均值为0方差为4的n维向量en

    mean    = 0  # 均值
    std_dev = 2  # 标准差，方差为标准差的平方，即4    

    # 生成向量
    e = np.random.normal(mean, std_dev, n)
    norm_e = np.linalg.norm(e)
    Y = np.dot(X, beta) + e
    
    delta = 1.01*norm_e
    # tau=1.1 84
    # tau=0.8 100


    beta_second, k_second = second(X, Y, t5, max_iter)
    beta_second_debias = second_debias(X, X@beta_second, t5, k_second)
    beta_second_debias = 2*beta_second-beta_second_debias
    sigma_second = compute_sigma(Y, X, beta_second)
    sigma_second_debias = compute_sigma(Y, X, beta_second_debias)

    
    result = []  # 初始化results变量

    for i in range(1, 1001):
        bn = 0.001 * i

        norms = [bn]
        for var_name in beta_names:
            var_copy = globals()[var_name].copy()
            var_copy[np.abs(var_copy) < bn] = 0
            norm = np.linalg.norm(var_copy - beta)
            norms.append(norm)
        result.append(norms)
    columns = ['bn'] + [f'norm_{name}' for name in beta_names]
    df = pd.DataFrame(result, columns=columns)
    df.to_csv('output.csv', index=False)
    
    results = []

    
    for column in df.columns[1:]:
        min_val = df[column].min()
        min_indices = np.where(df[column] == min_val)[0]
        # 检查最小值是否相邻，并计算平均索引
        if len(min_indices) > 1 and np.all(np.diff(min_indices) == 1):
            # 如果最小值相邻，计算平均索引
            # avg_index = np.mean(min_indices)
            # avg_index = min_indices[0]
            avg_index = np.percentile(min_indices, 60)
        else:
            # 如果只有一个最小值或最小值不相邻，直接使用找到的索引
            avg_index = min_indices[0]
        # 将结果添加到列表中（乘以0.001以匹配您的需求）
        results.append([min_val, avg_index * 0.001])
        results_array = np.array(results)
    


    for i, name in enumerate(variable_names):
        globals()[f'bn_{name}'] = results_array[i, 1]
    
    for name in variable_names:
        beta_var = globals()[f'beta_{name}']
        bn_var = globals()[f'bn_{name}']
        beta_var[np.abs(beta_var) < bn_var] = 0
        
    for name in variable_names:
        sigma = compute_sigma(Y, X, globals()[f'beta_{name}'])
        globals()[f'sigma_{name}'] = sigma

    Sigma_second += sigma_second
    Sigma_second_debias += sigma_second_debias     
    Norm_second += np.linalg.norm(beta_second-beta)
    Norm_second_debias += np.linalg.norm(beta_second_debias-beta)
        
    norminf_second_list = []
    norminf_second_debias_list = []
    results = Parallel(n_jobs=-1)(delayed(compute_norminf_second)(seed) for seed in range(500))
    for norminf_second, norminf_second_debias in results:
        norminf_second_list.append(norminf_second)
        norminf_second_debias_list.append(norminf_second_debias)
    inf_second = np.linalg.norm(beta - beta_second, ord=np.inf)
    inf_second_debias = np.linalg.norm(beta - beta_second_debias, ord=np.inf)
    Inf_norm_second += inf_second
    Inf_norm_second_debias += inf_second_debias
    if inf_second < np.percentile(norminf_second_list, 95):
        i_second += 1
    if inf_second_debias < np.percentile(norminf_second_debias_list, 95):
        i_second_debias += 1
        
    if x == 999:
        break


0
1


KeyboardInterrupt: 

In [None]:
print(f'Second Coverage: {i_second}')
print(f'Second Debiased Coverage: {i_second_debias}')
print(f'Second Sigma: {Sigma_second/1000}')
print(f'Second Debiased Sigma: {Sigma_second_debias/1000}')
print(f'Second Inf Norm: {Inf_norm_second/1000}')
print(f'Second Debiased Inf Norm: {Inf_norm_second_debias/1000}')
print(f'Second Norm: {Norm_second/1000}')
print(f'Second Debiased Norm: {Norm_second_debias/1000}')


Second Coverage: 70
Second Debiased Coverage: 70
Second Sigma: 0.23329909233980023
Second Debiased Sigma: 0.22358976246642992
Second Inf Norm: 0.056076511714969836
Second Debiased Inf Norm: 0.05166781452936204
Second Norm: 0.1586164002513614
Second Debiased Norm: 0.14730558086939793
