# Import Required Libraries
This cell imports the necessary Python libraries for the analysis. It includes the custom `final_util` library, `numpy` and `numpy.random` for numerical calculations, `matplotlib.pyplot` for data visualization, and sets the plot style to 'ggplot'.

In [2]:
from final_util import *
import numpy as np
import numpy.random as rgt
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Low-Dimensional Data Simulation
In this section, we simulate low-dimensional data based on the model:


Y = 1 + X β + ε


where the parameter vector β is defined as \{1, -1, 1, -1, ...\}, with each value being either 1 or -1.  


## Submodel 1: Data Generation
In this submodel, we generate the data as follows:

- The predictor matrix  X  follows a normal distribution:  X ~ N(0,1) .
- The error term  ε  follows a t-distribution with 2.25 degrees of freedom:  ε ~ t_{2.25}.

This model is used to simulate data based on the assumption of normally distributed predictors and error terms with heavy tails.


### Comparison of Errors for Submodel 1: gd vs. noisygd
In this subsection, we compare the errors of the following models as a function of the number of iterations:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters.

We will examine how the error evolves over iterations for three different privacy parameter settings in the `noisygd` model. The goal is to assess the impact of privacy on the model’s performance and understand how the error changes with respect to iterations.


In [3]:
n, p = 4000, 10 # sample size and dimension
t_df = 2.25 # degree of freedom for t distribution
lr = .5 # learning rate
c0 = .5 # common coefficient
mu = np.array([.3, .5 ,.9]) # privacy levels for GDP
repetitions = 300 # number of repetitions

rgt.seed(0) # set seed
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1) #   beta: {1,-1,1,...} 
beta_true = np.insert(beta, 0, 1) # Adding parameter for the intercept term 
beta_norm = beta_true.dot(beta_true)**0.5 # the l2 norm of true beta
 

T = int(np.ceil(np.log(n))) # number of iterations
B = (p + np.log(n))**0.5 # truncation parameter for noise Gaussian DP: Gaussian design
robust = c0 * (n/(p+np.log(n)))**0.5 # robustification parameter for noiseless Gaussian DP 


mse_G_t = np.zeros([T+1, repetitions])
priv_mse1_G_t = np.zeros([T+1, repetitions])
priv_mse2_G_t = np.zeros([T+1, repetitions])
priv_mse3_G_t = np.zeros([T+1, repetitions])
for m in range(repetitions):
    rgt.seed(m+1)
    X = rgt.normal(0, 1, size=(n,p)) # generate X 
    Y = 1 + X.dot(beta) + rgt.standard_t(t_df, n) # generate Y
    
    model = Huber(X, Y)
    out0 = model.gd(robust, lr=lr, max_niter=T)

    priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5 # robustification parameter for noise  Gaussian DP 
    out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
    out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
    out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)  
    
    mse_G_t[:,m] = np.sum((out0['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse1_G_t[:,m] = np.sum((out1['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse2_G_t[:,m] = np.sum((out2['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse3_G_t[:,m] = np.sum((out3['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm 

### Comparison of Errors for Submodel 1: gd vs. noisygd with Varying Sample Sizes
In this subsection, we compare the errors of the following models as a function of the sample size:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 


In [4]:
p = 10 # dimension 
t_df = 2.25 # degree of freedom for t distribution
lr = .5 # learning rate
c0 = .5 # common coefficient
mu = np.array([.3, .5 ,.9]) # privacy levels for GDP
repetitions = 300 # number of repetitions
sample_sizes = np.array(range(2000, 30001, 2000)) # range of sample sizes

rgt.seed(0) # set seed
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1) #   beta: {1,-1,1,...} 
beta_true = np.insert(beta, 0, 1) # Adding parameter for the intercept term 
beta_norm = beta_true.dot(beta_true)**0.5  # the l2 norm of true beta

mean_mse_G_t = []
mean_priv_mse1_G_t = []
mean_priv_mse2_G_t = []
mean_priv_mse3_G_t = []
for n in sample_sizes:
    errors_G_t = []
    errors_priv1_G_t = []
    errors_priv2_G_t = []
    errors_priv3_G_t = []
    T = int(np.ceil(np.log(n))) # number of iterations
    B = (p + np.log(n))**0.5 # truncation parameter for noise Gaussian DP: Gaussian design
    robust = c0 * (n/(p+np.log(n)))**0.5   # robustification parameter for noiseless Gaussian DP 
    for m in range(repetitions):
        rgt.seed(m+1)
        X = rgt.normal(0, 1, size=(n,p))
        Y = 1 + X.dot(beta) + rgt.standard_t(t_df, n)

        model = Huber(X, Y)
        out0 = model.gd(robust, lr=lr, max_niter=T)

        priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5 # robustification parameter for noise  Gaussian DP 
        out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
        out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
        out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)

        errors_G_t.append(np.sum((out0['beta'] - beta_true)**2)**0.5  / beta_norm)
        errors_priv1_G_t.append(np.sum((out1['beta'] - beta_true)**2)**0.5  / beta_norm)
        errors_priv2_G_t.append(np.sum((out2['beta'] - beta_true)**2)**0.5  / beta_norm)
        errors_priv3_G_t.append(np.sum((out3['beta'] - beta_true)**2)**0.5  / beta_norm)
    mean_mse_G_t.append(np.mean(errors_G_t))
    mean_priv_mse1_G_t.append(np.mean(errors_priv1_G_t))
    mean_priv_mse2_G_t.append(np.mean(errors_priv2_G_t))
    mean_priv_mse3_G_t.append(np.mean(errors_priv3_G_t))

## Submodel 2: Data Generation
In this submodel, we generate the data as follows:

- The predictor matrix  X  follows a normal distribution:  X ~ N(0,1) .
- The error term  ε  follows a normal distribution:  ε ~ N(0,1).

### Comparison of Errors for Submodel 2: gd vs. noisygd
In this subsection, we compare the errors of the following models as a function of the number of iterations:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 

In [5]:
n, p = 4000, 10  
lr = .5
c_0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300

rgt.seed(0)
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5  
 
T = int(np.ceil(np.log(n)))
B = (p + np.log(n))**0.5 
robust = c0 * (n/(p+np.log(n)))**0.5 

mse_G_G = np.zeros([T+1, repetitions])
priv_mse1_G_G = np.zeros([T+1, repetitions])
priv_mse2_G_G = np.zeros([T+1, repetitions])
priv_mse3_G_G = np.zeros([T+1, repetitions])
for m in range(repetitions):
    rgt.seed(m+1)
    X = rgt.normal(0, 1, size=(n,p))
    Y = 1 + X.dot(beta) + rgt.normal(0,1, n)
    
    model = Huber(X, Y)
    out0 = model.gd(robust, lr=lr, max_niter=T)

    priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
    out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
    out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
    out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)
    
    mse_G_G[:,m] = np.sum((out0['beta_seq'] - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse1_G_G[:,m] = np.sum((out1['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse2_G_G[:,m] = np.sum((out2['beta_seq'] - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse3_G_G[:,m] = np.sum((out3['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm

### Comparison of Errors for Submodel 2: gd vs. noisygd with Varying Sample Sizes
In this subsection, we compare the errors of the following models as a function of the sample size:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 


In [6]:
p = 10
lr = .5
c0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300
sample_sizes = np.array(range(2000, 30001, 2000))

rgt.seed(0)
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5

mean_mse_G_G = []
mean_priv_mse1_G_G = []
mean_priv_mse2_G_G = []
mean_priv_mse3_G_G = []
for n in sample_sizes:
    errors_G_G = []
    errors_priv1_G_G = []
    errors_priv2_G_G = []
    errors_priv3_G_G = []
    T = int(np.ceil(np.log(n)))
    B = (p + np.log(n))**0.5
    robust = c0 * (n/(p+np.log(n)))**0.5
    for m in range(repetitions):
        rgt.seed(m+1)
        X = rgt.normal(0, 1, size=(n,p))
        Y = 1 + X.dot(beta) + rgt.normal(0,1, n)

        model = Huber(X, Y)
        out0 = model.gd(robust, lr=lr, max_niter=T)

        priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
        out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
        out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
        out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)

        errors_G_G.append(np.sum((out0['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv1_G_G.append(np.sum((out1['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv2_G_G.append(np.sum((out2['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv3_G_G.append(np.sum((out3['beta']  - beta_true)**2)**0.5  / beta_norm)
    mean_mse_G_G.append(np.mean(errors_G_G))
    mean_priv_mse1_G_G.append(np.mean(errors_priv1_G_G))
    mean_priv_mse2_G_G.append(np.mean(errors_priv2_G_G))
    mean_priv_mse3_G_G.append(np.mean(errors_priv3_G_G))

## Submodel 3: Data Generation
In this submodel, we generate the data as follows:

- The predictor matrix  X  follows a uniform distribution:  X ~ U[-1,1] .
- The error term  ε  follows a t-distribution with 2.25 degrees of freedom:  ε ~ t_{2.25}.


### Comparison of Errors for Submodel 3: gd vs. noisygd
In this subsection, we compare the errors of the following models as a function of the number of iterations:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 

In [7]:
n, p = 4000, 10
t_df = 2.25
lr = .5
c0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300 

rgt.seed(0) 
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5
 
T = int(np.ceil(np.log(n))) 
B = np.sqrt(p)  # truncation parameter for noise Gaussian DP: Uniform design
robust = c0 * (n/(p+np.log(n)))**0.5 

mse_U_t = np.zeros([T+1, repetitions])
priv_mse1_U_t = np.zeros([T+1, repetitions])
priv_mse2_U_t = np.zeros([T+1, repetitions])
priv_mse3_U_t = np.zeros([T+1, repetitions])
for m in range(repetitions):
    rgt.seed(m+1)
    X = rgt.uniform(-1, 1, size=(n,p))
    Y = 1 + X.dot(beta) + rgt.standard_t(t_df, n)
    
    model = Huber(X, Y)
    out0 = model.gd(robust, lr=lr, max_niter=T)

    priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
    out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
    out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
    out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)
    
    mse_U_t[:,m] = np.sum((out0['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse1_U_t[:,m] = np.sum((out1['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse2_U_t[:,m] = np.sum((out2['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse3_U_t[:,m] = np.sum((out3['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm  

### Comparison of Errors for Submodel 3: gd vs. noisygd with Varying Sample Sizes
In this subsection, we compare the errors of the following models as a function of the sample size:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 

In [8]:
p = 10
t_df = 2.25
lr = .5
c0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300
sample_sizes = np.array(range(2000, 30001, 2000))

rgt.seed(0)
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5

mean_mse_U_t = []
mean_priv_mse1_U_t = []
mean_priv_mse2_U_t = []
mean_priv_mse3_U_t = []
for n in sample_sizes:
    errors_U_t = []
    errors_priv1_U_t = []
    errors_priv2_U_t = []
    errors_priv3_U_t = []
    T = int(np.ceil(np.log(n)))
    B = np.sqrt(p)
    robust = c0 * (n/(p+np.log(n)))**0.5
    for m in range(repetitions):
        rgt.seed(m+1)
        X = rgt.uniform(-1, 1, size=(n,p))
        Y = 1 + X.dot(beta) + rgt.standard_t(t_df, n)

        model = Huber(X, Y)
        out0 = model.gd(robust, lr=lr, max_niter=T)

        priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
        out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
        out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
        out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)

        errors_U_t.append(np.sum((out0['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv1_U_t.append(np.sum((out1['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv2_U_t.append(np.sum((out2['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv3_U_t.append(np.sum((out3['beta']  - beta_true)**2)**0.5  / beta_norm)
    mean_mse_U_t.append(np.mean(errors_U_t))
    mean_priv_mse1_U_t.append(np.mean(errors_priv1_U_t))
    mean_priv_mse2_U_t.append(np.mean(errors_priv2_U_t))
    mean_priv_mse3_U_t.append(np.mean(errors_priv3_U_t))

## Submodel 4: Data Generation
In this submodel, we generate the data as follows:

- The predictor matrix  X  follows a uniform distribution:  X ~ U[-1,1] .
- The error term  ε  follows a normal distribution:  ε ~ N(0,1).

### Comparison of Errors for Submodel 4: gd vs. noisygd
In this subsection, we compare the errors of the following models as a function of the number of iterations:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 

In [9]:
n, p = 4000, 10
lr = .5
c_0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300

rgt.seed(0) 
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5 
 
T = int(np.ceil(np.log(n))) 
B = np.sqrt(p) 
robust = c0 * (n/(p+np.log(n)))**0.5 

mse_U_G = np.zeros([T+1, repetitions])
priv_mse1_U_G = np.zeros([T+1, repetitions])
priv_mse2_U_G = np.zeros([T+1, repetitions])
priv_mse3_U_G = np.zeros([T+1, repetitions])
for m in range(repetitions):
    rgt.seed(m+1)
    X = rgt.uniform(-1, 1, size=(n,p))
    Y = 1 + X.dot(beta) + rgt.normal(0,1, n)
    
    model = Huber(X, Y)
    out0 = model.gd(robust, lr=lr, max_niter=T)

    priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
    out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
    out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
    out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)
    
    mse_U_G[:,m] = np.sum((out0['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse1_U_G[:,m] = np.sum((out1['beta_seq']  - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse2_U_G[:,m] = np.sum((out2['beta_seq'] - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    priv_mse3_U_G[:,m] = np.sum((out3['beta_seq'] - beta_true[:,None])**2, axis=0)**0.5  / beta_norm
    

### Comparison of Errors for Submodel 4: gd vs. noisygd with Varying Sample Sizes
In this subsection, we compare the errors of the following models as a function of the sample size:

- **gd**: The non-noisy gradient descent model.
- **noisygd**: The gradient descent model with different privacy parameters. 

In [10]:
p = 10
lr = .5
c0 = .5
mu = np.array([.3, .5 ,.9])
repetitions = 300
sample_sizes = np.array(range(2000, 30001, 2000)) 

rgt.seed(0)
beta = np.ones(p)*(2*rgt.binomial(1, 0.5, size=p)-1)
beta_true = np.insert(beta, 0, 1)
beta_norm = beta_true.dot(beta_true)**0.5 

mean_mse_U_G = []
mean_priv_mse1_U_G = []
mean_priv_mse2_U_G = []
mean_priv_mse3_U_G = []
for n in sample_sizes:
    errors_U_G = []
    errors_priv1_U_G = []
    errors_priv2_U_G = []
    errors_priv3_U_G = []
    T = int(np.ceil(np.log(n)))
    B = np.sqrt(p)
    robust = c0 * (n/(p+np.log(n)))**0.5
    for m in range(repetitions):
        rgt.seed(m+1)
        X = rgt.uniform(-1, 1, size=(n,p))
        Y = 1 + X.dot(beta) + rgt.normal(0,1, n)

        model = Huber(X, Y)
        out0 = model.gd(robust, lr=lr, max_niter=T)

        priv_robust = c0 * mad(out0['residuals']) * (n*mu/(p+np.log(n)))**0.5
        out1 = model.noisygd(priv_robust[0], lr=lr, B=B, mu=mu[0], T=T)
        out2 = model.noisygd(priv_robust[1], lr=lr, B=B, mu=mu[1], T=T)
        out3 = model.noisygd(priv_robust[2], lr=lr, B=B, mu=mu[2], T=T)

        errors_U_G.append(np.sum((out0['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv1_U_G.append(np.sum((out1['beta']  - beta_true)**2)**0.5  / beta_norm)
        errors_priv2_U_G.append(np.sum((out2['beta'] - beta_true)**2)**0.5  / beta_norm)
        errors_priv3_U_G.append(np.sum((out3['beta']  - beta_true)**2)**0.5  / beta_norm)
    mean_mse_U_G.append(np.mean(errors_U_G))
    mean_priv_mse1_U_G.append(np.mean(errors_priv1_U_G))
    mean_priv_mse2_U_G.append(np.mean(errors_priv2_U_G))
    mean_priv_mse3_U_G.append(np.mean(errors_priv3_U_G))