# Import Required Libraries
This cell imports the necessary Python libraries for the analysis. It includes the custom `final_util` library, `numpy` and `numpy.random` for numerical calculations, `matplotlib.pyplot` for data visualization, and sets the plot style to 'ggplot'.

In [2]:
from final_util import *
import numpy as np
import numpy.random as rgt
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# High-Dimensional Data Simulation
In this section, we simulate high-dimensional data based on the model:


Y = 2 + X β + ε


where:
-  X  is a matrix of high-dimensional data with entries drawn from a uniform distribution  U[-2,2] .
-  β is a sparse parameter vector, where the first 10 entries are non-zero and either 1 or -1, and the rest of the entries are zeros.
-  ε is the error term, which can be drawn from a specified distribution (e.g., N(0,1) or  t_{2.25} ).
-  The sparsity of the model is 10.


## Submodel 1: ε ~ N(0, 1) 

In this submodel, the error term ε is drawn from a standard normal distribution   N(0, 1). 


### Error vs. Iterations for Submodel 1

In this subsection, we compare the errors of the high-dimensional gradient descent model `gd_highdim` and the noisy gradient descent model `noisygd_highdim` over multiple iterations. 


In [3]:
p=2000 # dimension
n=20000 # sample size
n_1 = 200 # sample size for initial estimation
T_1 =   4  ## number of iterations for initial estimation
n_2 = n - n_1 # the rest sample size for DP Huber 
s_star = 10 # ture sparsity level
s_huber =  2*s_star   
lr_huber = 0.1  #  learning rate for noise huber
mu,delta = 0.5,0.01 # privacy levels
repetitions = 300  # number of repetitions

rgt.seed(0) # set seed
beta = np.zeros(p)  # initial beta 
beta[:s_star] =  np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1) # beta:{1,-1,1,...,}
beta_true = np.insert(beta, 0, 2) # Adding parameter for the intercept term 
beta_norm = beta_true.dot(beta_true)**0.5 # the l2 norm of true beta

lr_noiseless = 0.5 # learning rate for noiseless huber
robust_noiseless = (n_1/(s_huber*np.log(p)+np.log(n_1)))**0.5  # robustification parameter for  initial estimation

B_huber = .4*(np.log(p) + np.log(n_2))**0.5  #  ## truncation parameter  
T =   int(np.ceil(np.log(n_2)))  ## number of iterations
c_0 = .2 # common constant for DP robust parameter
robust = c_0 * (n_2/(s_huber*np.log(p)+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber
robust_noise = c_0 * (n_2*mu/(s_huber*np.log(p)+np.log(n_2)))**0.5 #   robustification parameter for  noise  huber
robust_low1 = .5*(n_2/(s_huber+1+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber: intercept
robust_low2 = .5*(n_2*mu/(s_huber+1+np.log(n_2)))**0.5 #   robustification parameter for  noise  huber: intercept

Iteration_re_G = np.zeros([T+1, repetitions]) 
Iteration_re_G_noise = np.zeros([T+1, repetitions])  
for m in range(repetitions):
    rgt.seed(m+1) 
    X = np.random.uniform(-2 ,2, (n, p)) # generate X
    Y = 2+ X.dot(beta) + np.random.normal(0, 1, n)  # generate Y

    random_rows = np.random.choice(n,size=n_1,replace=False)# data split

    ## subsample for initial estimation: noiseless huber
    X_subsample = X[random_rows]
    Y_subsample = Y[random_rows]
    model_sub = Huber(X_subsample, Y_subsample,intercept=True)
    initial = model_sub.gd_highdim( lr=lr_noiseless, T=T_1,   s=s_huber,tau=None, robust=robust_noiseless, beta0=np.array([]),  standardize= False) 

    ## rest for DP Huber
    X_rest = X[~np.isin(np.arange(n), random_rows)]
    Y_rest = Y[~np.isin(np.arange(n), random_rows)] 
    beta0 = initial['beta']
    model = Huber(X_rest, Y_rest,intercept=True)  
    out_Huber_noise_new = model.noisygd_highdim( mu=mu , T=T, delta=delta, lr=lr_huber, beta0 =beta0 ,  s=s_huber,robust_low1=robust_low1,robust_low2=robust_low2,robust_high1=robust ,robust_high2=robust_noise, B_high=B_huber,  standardize=False) 

    # results 
    Iteration_re_G[:,m] = np.sum((out_Huber_noise_new['beta_seq1']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm 
    Iteration_re_G_noise[:,m] = np.sum((out_Huber_noise_new['beta_seq2']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm  

### Errors vs. Sample Sizes for Submodel 1

In this subsection, we compare the errors of the high-dimensional gradient descent model `gd_highdim`, the noisy gradient descent model `noisygd_highdim_comp`, and the sparse Differential Privacy (DP)  Least Squares `noisygd_ls` model as a function of the sample size. 



#### Errors vs. Sample Sizes for sparse DP Least Squares (sparse DP LS)  

In this subsection, we present the results of applying sparse Differential Privacy (DP) Least Squares to high-dimensional data. 


In [4]:
p = 2000
s_star = 10 # true sparsity
s_ls = 2*s_star # sparsity level 
repetitions = 300 
mu,delta = 0.5,0.01 #  privacy levels  
sample_sizes = np.array([2500,5000,10000]) # sample sizes
R_coef_cand = np.array([0.1,0.5,1  ]) # candidate coefs for R_ls
 
rgt.seed(0)
beta = np.zeros(p) 
beta[:s_star] = np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1)  
beta_norm = beta.dot(beta)**0.5

HD_ls_noise_G_all = []
for R_coef in R_coef_cand: 
    HD_ls_noise_G = []
    for n in sample_sizes:
        print(n)
        T = int(np.ceil(np.log(n)))  ## number of iterations  
        
        lr_ls = 0.1  ## learning rate for DP LS  
        C_ls = 1.01 * beta_norm # feasibility parameter for DP LS 
        R_ls = R_coef * 2 * np.sqrt(2*np.log(n)) # truncation level for DP LS
        c_x = 2
        B_ls = 4*(R_ls+C_ls*c_x)*c_x/np.sqrt(s_ls) # noise scale for DP LS
 
        HD_ls_noise_G_sub = [] 
        for m in range(repetitions):
            rgt.seed(m+1)
            X = np.random.uniform(-2 ,2, (n, p))
            Y = 2+X.dot(beta) + np.random.normal(0, 1, n)   

            X_cent = X - np.mean(X, axis=0)# Centralization
            Y_cent = Y - np.mean(Y)# Centralization
            model_ls = Huber(X_cent, Y_cent,intercept=False)
            out_LS = model_ls.noisygd_ls(mu=mu , T=T, delta=delta, lr=lr_ls, s=s_ls, R=R_ls,C=C_ls, B=B_ls,beta0=np.array([]), standardize=False)

            HD_ls_noise_G_sub.append(np.sum((out_LS['beta'] - beta)**2)**0.5  / beta_norm)  
        HD_ls_noise_G.append(np.mean(HD_ls_noise_G_sub))
    HD_ls_noise_G_all.append(HD_ls_noise_G)

2500
5000
10000
2500
5000
10000
2500
5000
10000


#### Errors vs. Sample Sizes for High-Dimensional sparse Huber Regression

In this subsection, we compare the errors of high-dimensional Huber regression (noiseless and noisy) as a function of sample size. The intercept term is iterated without noise to ensure a fair comparison with the sparse Differential Privacy (DP) Least Squares (LS) method.


In [5]:
p = 2000
s_star = 10 
s_huber = 2*s_star
n_1 = 200
T_1 = 4
mu,delta = 0.5,0.01 
repetitions = 300 
sample_sizes = np.array([2500,5000,10000 ])

rgt.seed(0)
beta = np.zeros(p) 
beta[:s_star] = np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1)  
beta_norm = beta.dot(beta)**0.5

robust_noiseless = (n_1/(s_huber*np.log(p)+np.log(n_1)))**0.5
lr_noiseless = 0.5 

HD_Huber_G = []
HD_Huber_noise_G = []
for n in sample_sizes:
    print(n)
    n_2 = n - n_1 

    c_0 = .2 # common constant for DP robust parameter
    robust = c_0 * (n_2/(s_huber*np.log(p)+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber
    robust_noise = c_0 * (n_2*mu/(s_huber*np.log(p)+np.log(n_2)))**0.5    #   robustification parameter for  noise  huber
    robust_low1 = .5*(n_2 /(s_huber+1+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber: intercept 
    
    T = int(np.ceil(np.log(n_2)))  ## number of iterations
    lr_huber = 0.1  ## learning rate for DP Huber
    B_huber =  .4*(np.log(p) + np.log(n_2))**0.5  ## truncation parameter for DP Huber  
   
    HD_Huber_G_sub = []
    HD_Huber_noise_G_sub = []
    for m in range(repetitions):
        rgt.seed(m+1)
        X = np.random.uniform(-2 ,2, (n, p))
        Y = 2+X.dot(beta) + np.random.normal(0, 1, n) 

        ## DP Huber
        random_rows = np.random.choice(n,size=n_1,replace=False)
        X_subsample = X[random_rows]
        Y_subsample = Y[random_rows]
        X_rest = X[~np.isin(np.arange(n), random_rows)]
        Y_rest = Y[~np.isin(np.arange(n), random_rows)]
        model_sub = Huber(X_subsample, Y_subsample,intercept=True)
        initial = model_sub.gd_highdim( lr=lr_noiseless, T=T_1,   s=s_huber,tau=None, robust=robust_noiseless, beta0=np.array([]),   standardize= False)  
        beta0 = initial['beta']
        model_huber = Huber(X_rest, Y_rest,intercept=True)  
        out_Huber_noise_new = model_huber.noisygd_highdim_comp( mu=mu , T=T, delta=delta, lr=lr_huber, beta0 =beta0 ,  s=s_huber,robust_low =robust_low1 ,robust_high1=robust ,robust_high2=robust_noise, B_high=B_huber,  standardize=False)


    
        HD_Huber_G_sub.append(np.sum((out_Huber_noise_new['beta1'][1:]  - beta)**2 )**0.5  / beta_norm )
        HD_Huber_noise_G_sub.append(np.sum((out_Huber_noise_new['beta2'][1:]  - beta)**2, axis=0)**0.5  / beta_norm) 
    HD_Huber_G.append(np.mean(HD_Huber_G_sub))
    HD_Huber_noise_G.append(np.mean(HD_Huber_noise_G_sub))

2500
5000
10000


## Submodel 2: ε ~ t_{2.25}

In this submodel, the error term ε is drawn from t_{2.25}. 


### Error vs. Iterations for Submodel 2

In this subsection, we compare the errors of the high-dimensional gradient descent model `gd_highdim` and the noisy gradient descent model `noisygd_highdim` over multiple iterations.  

In [7]:
p=2000 # dimension
n=20000 # sample size
n_1 = 200 # sample size for initial estimation
T_1 =   4  ## number of iterations for initial estimation
n_2 = n - n_1 # the rest sample size for DP Huber 
s_star = 10 # ture sparsity level
s_huber =  2*s_star # 
lr_huber = 0.1  #  learning rate for noise huber
mu,delta = 0.5,0.01 # privacy levels
repetitions = 300  # number of repetitions

rgt.seed(0) # set seed
beta = np.zeros(p)  # initial beta 
beta[:s_star] =  np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1) # beta:{1,-1,1,...,}
beta_true = np.insert(beta, 0, 2) # Adding parameter for the intercept term 
beta_norm = beta_true.dot(beta_true)**0.5 # the l2 norm of true beta

lr_noiseless = 0.5 # learning rate for noiseless huber 
robust_noiseless = (n_1/(s_huber*np.log(p)+np.log(n_1)))**0.5  # robustification parameter for  initial estimation

B_huber = .4*(np.log(p) + np.log(n_2))**0.5  #  ## truncation parameter  
T =   int(np.ceil(np.log(n_2)))  ## number of iterations
c_0 = .2 # common constant for DP robust parameter
robust = c_0 * (n_2/(s_huber*np.log(p)+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber
robust_noise = c_0 * (n_2*mu/(s_huber*np.log(p)+np.log(n_2)))**0.5 #   robustification parameter for  noise  huber
robust_low1 = .5*(n_2/(s_huber+1+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber: intercept
robust_low2 = .5*(n_2*mu/(s_huber+1+np.log(n_2)))**0.5 #   robustification parameter for  noise  huber: intercept

Iteration_re_t = np.zeros([T+1, repetitions]) 
Iteration_re_t_noise = np.zeros([T+1, repetitions])  
for m in range(repetitions):
    rgt.seed(m+1) 
    X = np.random.uniform(-2 ,2, (n, p)) # generate X
    Y = 2+ X.dot(beta) + rgt.standard_t(2.25, n) # generate Y

    random_rows = np.random.choice(n,size=n_1,replace=False)# data split

    ## subsample for initial estimation: noiseless huber
    X_subsample = X[random_rows]
    Y_subsample = Y[random_rows]
    model_sub = Huber(X_subsample, Y_subsample,intercept=True)
    initial = model_sub.gd_highdim( lr=lr_noiseless, T=T_1,   s=s_huber,tau=None, robust=robust_noiseless, beta0=np.array([]),  standardize= False) 

    ## rest for DP Huber
    X_rest = X[~np.isin(np.arange(n), random_rows)]
    Y_rest = Y[~np.isin(np.arange(n), random_rows)] 
    beta0 = initial['beta']
    model = Huber(X_rest, Y_rest,intercept=True)  
    out_Huber_noise_new = model.noisygd_highdim( mu=mu , T=T, delta=delta, lr=lr_huber, beta0 =beta0 ,  s=s_huber,robust_low1=robust_low1,robust_low2=robust_low2,robust_high1=robust ,robust_high2=robust_noise, B_high=B_huber,  standardize=False) 

    # results 
    Iteration_re_t[:,m] = np.sum((out_Huber_noise_new['beta_seq1']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm 
    Iteration_re_t_noise[:,m] = np.sum((out_Huber_noise_new['beta_seq2']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm  

### Errors vs. Sample Sizes for Submodel 2

In this subsection, we compare the errors of the high-dimensional gradient descent model `gd_highdim`, the noisy gradient descent model `noisygd_highdim_comp`, and the sparse Differential Privacy (DP) Least Squares `noisygd_ls` model as a function of the sample size. 



#### Errors vs. Sample Sizes for sparse DP Least Squares (sparse DP LS)  

In this subsection, we present the results of applying sparse Differential Privacy (DP) Least Squares to high-dimensional data. 


In [9]:
p = 2000
s_star = 10 # true sparsity
s_ls = 2*s_star # sparsity level 
repetitions = 300 
mu,delta = 0.5,0.01 #  privacy levels  
sample_sizes = np.array([2500,5000,10000]) # sample sizes
R_coef_cand = np.array([0.1,0.5,1  ]) # candidate coefs for R_ls
 
rgt.seed(0)
beta = np.zeros(p) 
beta[:s_star] = np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1)  
beta_norm = beta.dot(beta)**0.5

HD_ls_noise_t_all = []
for R_coef in R_coef_cand: 
    HD_ls_noise_t = []
    for n in sample_sizes:
        print(n)
        T = int(np.ceil(np.log(n)))  ## number of iterations  
        
        lr_ls = 0.1  ## learning rate for DP LS  
        C_ls = 1.01 * beta_norm # feasibility parameter for DP LS 
        R_ls = R_coef * 2 * np.sqrt(2*np.log(n)) # truncation level for DP LS
        c_x = 2
        B_ls = 4*(R_ls+C_ls*c_x)*c_x/np.sqrt(s_ls) # noise scale for DP LS
 
        HD_ls_noise_t_sub = [] 
        for m in range(repetitions):
            rgt.seed(m+1)
            X = np.random.uniform(-2 ,2, (n, p))
            Y = 2+X.dot(beta) + rgt.standard_t(2.25, n)   

            X_cent = X - np.mean(X, axis=0)# Centralization
            Y_cent = Y - np.mean(Y)# Centralization
            model_ls = Huber(X_cent, Y_cent,intercept=False)
            out_LS = model_ls.noisygd_ls(mu=mu , T=T, delta=delta, lr=lr_ls, s=s_ls, R=R_ls,C=C_ls, B=B_ls,beta0=np.array([]), standardize=False)

            HD_ls_noise_t_sub.append(np.sum((out_LS['beta'] - beta)**2)**0.5  / beta_norm)  
        HD_ls_noise_t.append(np.mean(HD_ls_noise_t_sub))
    HD_ls_noise_t_all.append(HD_ls_noise_t)

2500
5000
10000
2500
5000
10000
2500
5000
10000


#### Errors vs. Sample Sizes for High-Dimensional Huber Regression

In this subsection, we compare the errors of high-dimensional Huber regression (noiseless and noisy) as a function of sample size. The intercept term is iterated without noise to ensure a fair comparison with the sparse Differential Privacy (DP) Least Squares (LS) method.


In [10]:
p = 2000
s_star = 10 
s_huber = 2*s_star
n_1 = 200
T_1 = 4
mu,delta = 0.5,0.01 
repetitions = 300 
sample_sizes = np.array([2500,5000,10000 ])

rgt.seed(0)
beta = np.zeros(p) 
beta[:s_star] = np.ones(s_star)*(2*rgt.binomial(1, 0.5, size=s_star)-1)  
beta_norm = beta.dot(beta)**0.5

robust_noiseless = (n_1/(s_huber*np.log(p)+np.log(n_1)))**0.5
lr_noiseless = 0.5 

HD_Huber_t = []
HD_Huber_noise_t = []
for n in sample_sizes:
    print(n)
    n_2 = n - n_1 

    c_0 = .2 # common constant for DP robust parameter
    robust = c_0 * (n_2/(s_huber*np.log(p)+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber
    robust_noise = c_0 * (n_2*mu/(s_huber*np.log(p)+np.log(n_2)))**0.5    #   robustification parameter for  noise  huber
    robust_low1 = .5*(n_2 /(s_huber+1+np.log(n_2)))**0.5 # robustification parameter for  noiseless huber: intercept 
    
    T = int(np.ceil(np.log(n_2)))  ## number of iterations
    lr_huber = 0.1  ## learning rate for DP Huber
    B_huber =  .4*(np.log(p) + np.log(n_2))**0.5  ## truncation parameter for DP Huber  
   
    HD_Huber_t_sub = []
    HD_Huber_noise_t_sub = []
    for m in range(repetitions):
        rgt.seed(m+1)
        X = np.random.uniform(-2 ,2, (n, p))
        Y = 2+X.dot(beta) + rgt.standard_t(2.25, n)  

        ## DP Huber
        random_rows = np.random.choice(n,size=n_1,replace=False)
        X_subsample = X[random_rows]
        Y_subsample = Y[random_rows]
        X_rest = X[~np.isin(np.arange(n), random_rows)]
        Y_rest = Y[~np.isin(np.arange(n), random_rows)]
        model_sub = Huber(X_subsample, Y_subsample,intercept=True)
        initial = model_sub.gd_highdim( lr=lr_noiseless, T=T_1,   s=s_huber,tau=None, robust=robust_noiseless, beta0=np.array([]),  standardize= False)  
        beta0 = initial['beta']
        model_huber = Huber(X_rest, Y_rest,intercept=True)  
        out_Huber_noise_new = model_huber.noisygd_highdim_comp( mu=mu , T=T, delta=delta, lr=lr_huber, beta0 =beta0 ,  s=s_huber,robust_low =robust_low1 ,robust_high1=robust ,robust_high2=robust_noise, B_high=B_huber,  standardize=False)


    
        HD_Huber_t_sub.append(np.sum((out_Huber_noise_new['beta1'][1:]  - beta)**2 )**0.5  / beta_norm )
        HD_Huber_noise_t_sub.append(np.sum((out_Huber_noise_new['beta2'][1:]  - beta)**2, axis=0)**0.5  / beta_norm) 
    HD_Huber_t.append(np.mean(HD_Huber_t_sub))
    HD_Huber_noise_t.append(np.mean(HD_Huber_noise_t_sub))

2500
5000
10000
