In [1]:
import crypten
import torch
import sys
import os
from crypten.config import cfg
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)
# Add the parent directory to the Python path

sys.path.append(parent_dir)


from privacy_utils import *
import pandas as pd

crypten.init()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
crypten.mpc.cfg

<crypten.config.config.CrypTenConfig at 0x7f797973e390>

In [3]:
# cfg.set_config(reciprocal_nr_iters= 100)

Implementing linear EIG calc in crypten

In [4]:
n_host_sample = 2000 
sigma_error = 1
d = 10
A = torch.randn((d,d))
A = 1/(torch.det(A)) * A

T_allocation_host = torch.randn(d)
T_allocation_host = 100/torch.norm(T_allocation_host)*T_allocation_host

mu_nc = torch.randn(d)
mu_nc = 1/torch.norm(mu_nc)*mu_nc

mu_c = torch.randn(d)
mu_c = 1/torch.norm(mu_c)*mu_c

mu = torch.concat([mu_nc,mu_c])

In [5]:
X_host_no_T = (torch.randn((n_host_sample,d)) @ A ) 
T_host = torch.bernoulli(torch.sigmoid(X_host_no_T@ T_allocation_host))
X_host_times_T = (T_host.unsqueeze(dim=0).T * X_host_no_T)
X_host = torch.concat([X_host_no_T,X_host_times_T],dim=1)

Y_host = X_host @ mu
Y_host = (1/Y_host.std()) * (Y_host-Y_host.mean()) + sigma_error * torch.randn_like(Y_host)

In [6]:
X_host_no_T = (torch.randn((n_host_sample,d)) @ A ) 
T_host = torch.bernoulli(torch.sigmoid(X_host_no_T@ T_allocation_host))
X_host_times_T = (T_host.unsqueeze(dim=0).T * X_host_no_T)
X_host = torch.concat([X_host_no_T,X_host_times_T],dim=1)

In [7]:
sigma_error = 1
prior_mean = torch.zeros(2 * d)
beta_0, sigma_0_sq,inv_cov_0 = prior_mean, sigma_error,torch.eye(2*d)

In [8]:
X_host_no_T_2 = (torch.randn((n_host_sample,d)) @ A ) 
T_host_2 = torch.bernoulli(torch.sigmoid(X_host_no_T@ T_allocation_host))
X_host_times_T_2 = (T_host.unsqueeze(dim=0).T * X_host_no_T)
X_host_2 = torch.concat([X_host_no_T,X_host_times_T],dim=1)

In [9]:
compare_obs_EIG_lin(X_host,X_host,torch.eye(X_host.shape[1]),DP=True,epsilon=10)

{'EIG_obs_torch': 0.010826638899743557,
 'EIG_obs_crypten': 0.02490234375,
 'EIG_obs_DP': -0.0021080654422640883}

In [10]:
compare_caus_EIG_lin(X_host,X_host,torch.eye(X_host.shape[1]),d,DP=True,epsilon=10)

{'EIG_caus_torch': 0.0036485553719103336,
 'EIG_caus_crypten': 0.0095062255859375,
 'EIG_caus_DP': 1.9161040601676553}

## IDHP

In [11]:
exp_parameters = {'number_of_candidate_sites': 20+1,
                'min_sample_size_cand': 200, 
                'max_sample_size_cand': 400, 
                'host_sample_size': 400,
                'host_test_size': 2000,
                'outcome_function': None, 
                'std_true_y': 1, 
                'power_x': 1, 
                'power_x_t': 1}
epsilon = 10

In [None]:
data_with_groundtruth, x, t, y = get_data("IDHP", "")
XandT = pd.concat([x,t], axis=1)
XandT = XandT.sample(n=10**4, replace=True, random_state=42)


candidate_sites = generating_random_sites_from(XandT, data_with_groundtruth, exp_parameters, added_T_coef=40, binary_outcome=False)  

host = candidate_sites[0][:400]
XandT_host, Y_host = torch.from_numpy(host.drop(columns=["Y"]).values), torch.from_numpy(host["Y"].values)
host_test = candidate_sites[0][400:]
causal_param_first_index = np.shape(XandT)[1]

candidate_sites = {key: value for key, value in candidate_sites.items() if key != 0}

cov = torch.eye(XandT_host.shape[1])

  return 1.0 / (1.0 + np.exp(-x))


In [13]:
privacy_results_index = {
    'EIG_obs_torch':[],
    'EIG_obs_crypten':[],
    'EIG_obs_DP' : [],
    'EIG_caus_torch':[],
    'EIG_caus_crypten':[],
    'EIG_caus_DP':[]
}

for _, candidate in tqdm(candidate_sites.items()):
    X_cand = torch.from_numpy(candidate.drop(columns=["Y"]).values)
    obs_results_dict = compare_obs_EIG_lin(XandT_host,X_cand,cov,DP=True,epsilon=epsilon)
    caus_results_dict = compare_caus_EIG_lin(XandT_host,X_cand,cov,DP=True,epsilon=epsilon)

    for key in obs_results_dict:
        privacy_results_index[key].append( obs_results_dict[key] )
    
    for key in caus_results_dict:
        privacy_results_index[key].append( caus_results_dict[key] )

  0%|          | 0/20 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [None]:
privacy_results_index

{'EIG_obs_torch': [tensor(42.4834, dtype=torch.float64),
  tensor(42.4834, dtype=torch.float64)],
 'EIG_obs_crypten': [tensor(3.6664e+10), tensor(3.6664e+10)],
 'EIG_obs_DP': [-0.3570008335365263, 1.8698362204815784],
 'EIG_caus_torch': [],
 'EIG_caus_crypten': [],
 'EIG_caus_DP': []}

In [14]:
X_cand = torch.from_numpy(candidate_sites[1].drop(columns=["Y"]).values)
X_host = XandT_host

In [15]:
XX_cand = X_cand.T @ X_cand
X_host_plus_cov = X_host.T @ X_host + cov
X_host_plus_cov_inv = torch.linalg.inv(X_host_plus_cov)
_,EIG_torch = torch.slogdet((XX_cand @ X_host_plus_cov_inv + torch.eye(XX_cand.shape[0])))

X_host_crypten = crypten.mpc.MPCTensor(X_host_plus_cov_inv)
X_cand_crypten = crypten.mpc.MPCTensor(XX_cand)
# EIG_crypten = logdet_Crypten(X_cand_crypten @ X_host_crypten + torch.eye(XX_cand.shape[0])).get_plain_text()

In [16]:
EIG_torch

tensor(29.8143, dtype=torch.float64)

In [17]:
torch.trace(XX_cand @ X_host_plus_cov_inv)

tensor(71.6209, dtype=torch.float64)

In [22]:
((XX_cand @ X_host_plus_cov_inv - (X_cand_crypten @ X_host_crypten).get_plain_text())**2).mean()

tensor(3.9593e-06, dtype=torch.float64)

In [19]:
((XX_cand @ X_host_plus_cov_inv - (X_cand_crypten @ X_host_crypten).get_plain_text())**2)

tensor([[2.4824e-04, 7.0880e-06, 5.4062e-06,  ..., 2.1924e-05, 2.3394e-05,
         1.2811e-08],
        [4.4588e-06, 9.4655e-06, 1.5851e-05,  ..., 9.5247e-06, 1.0632e-05,
         4.6550e-06],
        [6.0178e-08, 8.4829e-06, 7.7740e-06,  ..., 1.4245e-05, 1.6422e-06,
         3.3430e-06],
        ...,
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [4.6108e-09, 2.7902e-10, 3.5598e-12,  ..., 9.5143e-10, 3.2901e-10,
         2.1415e-09],
        [7.0164e-08, 3.6296e-09, 1.6724e-08,  ..., 2.9084e-08, 4.4539e-09,
         1.4704e-08]], dtype=torch.float64)

In [20]:
EIG_torch

tensor(29.8143, dtype=torch.float64)

In [23]:
logdet_Crypten((X_cand_crypten @ X_host_crypten + torch.eye(XX_cand.shape[0]))).get_plain_text()

tensor(3.5383e+11)

In [24]:
L,D = chol_LD_Crypten((X_cand_crypten @ X_host_crypten + torch.eye(XX_cand.shape[0])))
D.get_plain_text()

tensor([ 3.9963e+00,  1.6523e+00, -4.7195e-02,  8.5515e+01, -5.7466e-01,
         4.6540e+00,  1.5619e+01,  2.7983e-01, -1.3094e-01,  1.8162e+03,
        -1.3856e+09, -3.2764e+08, -5.7800e+07,  1.8413e+09, -1.2079e+09,
         2.0450e+09,  1.9042e+09, -1.1779e+09,  1.1100e+09,  3.9671e+08,
         3.6923e+08, -1.6075e+09, -4.9637e+08, -1.0251e+09, -1.9897e+09,
        -1.3163e+09, -8.1015e+08, -1.8746e+07, -1.4175e+09,  1.4418e+09,
        -1.6897e+09,  4.4712e+08, -2.0732e+09, -1.4727e+09, -3.3680e+08,
        -1.7797e+09,  1.3191e+09,  2.0123e+09,  7.3030e+08,  2.1015e+09,
         5.5994e+08, -1.2819e+08, -1.8958e+09,  1.4797e+09, -1.5854e+09,
        -1.7398e+09,  3.2753e+07,  1.7081e+09,  1.4338e+09,  1.0000e+00,
        -1.9549e+09,  1.6168e+09])

In [None]:
L.get_plain_text()

tensor([[ 1.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1499e-01,  1.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.8390e-01, -1.4977e+00,  1.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-3.1738e-03, -7.6599e-03,  4.4585e+00,  ...,  1.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0223e-03,  2.4719e-03, -9.3449e-01,  ..., -4.2973e+07,
          1.0000e+00,  0.0000e+00],
        [-8.0719e-03, -9.4910e-03,  9.4061e+00,  ..., -2.8549e+07,
          1.5735e+09,  1.0000e+00]])

In [None]:
torch.linalg.lu_factor(XX_cand @ X_host_plus_cov_inv + torch.eye(XX_cand.shape[0]))

torch.return_types.linalg_lu_factor(
LU=tensor([[ 4.6159e+00,  2.8369e-01, -1.0249e+00,  ...,  1.1406e+00,
         -1.0249e+00, -2.0579e-01],
        [-4.1807e-01,  1.8124e+00,  4.3584e-01,  ..., -5.6092e-01,
          1.0328e+00, -9.6030e-01],
        [-4.8676e-01, -1.5171e-01,  2.6580e+00,  ..., -5.4901e-01,
          1.3469e+00, -1.0811e+00],
        ...,
        [-3.2138e-03,  8.3741e-05, -5.8697e-04,  ...,  1.5089e+00,
         -4.8849e-02, -3.2410e-02],
        [-1.0438e-03,  2.4419e-03,  4.8890e-05,  ..., -3.6593e-02,
          1.9408e+00, -6.9743e-02],
        [-8.2162e-03,  5.5747e-03, -5.0060e-03,  ...,  2.2274e-01,
          5.2683e-02,  1.8721e+00]], dtype=torch.float64),
pivots=tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
        37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52],
       dtype=torch.int32))

In [None]:
XX_cand_caus = X_cand.T @ X_cand
X_host_plus_cov = X_host.T @ X_host + cov
XX_cand_causal,X_host_plus_cov = XX_cand_caus[causal_param_first_index:,causal_param_first_index:],X_host_plus_cov[causal_param_first_index:,causal_param_first_index:]
X_host_plus_cov_inv_caus = torch.linalg.inv(X_host_plus_cov)
_,EIG_torch_caus = torch.slogdet(XX_cand_causal @ X_host_plus_cov_inv_caus + torch.eye(X_host_plus_cov_inv_caus.shape[0]))

X_host_crypten = crypten.mpc.MPCTensor(X_host_plus_cov_inv)
XX_cand_crypten = crypten.mpc.MPCTensor(XX_cand_causal)
EIG_crypten_caus = logdet_Crypten(XX_cand_crypten @ X_host_crypten + torch.eye(X_host_plus_cov_inv_caus.shape[0])).get_plain_text()

In [None]:
print(EIG_torch_caus,EIG_crypten_caus)

tensor(12.4140, dtype=torch.float64) tensor(11.0452)


In [None]:
D_plain = D.get_plain_text()

In [None]:
torch.log(torch.abs(D_plain)).sum()

tensor(20.7083)

In [None]:
torch.log(D_plain).sum()

tensor(nan)

## Misc

In [None]:
# if type(X_host) is torch.Tensor:
# 
cov = np.eye(X_host.shape[1])
y = np.zeros(len(X_host))

Z = get_diff_private_version(X_host,y)

XX_host = Z["XX"]

_,logdet_1 = np.linalg.slogdet(X_cand.T @ X_cand + XX_host + cov)
_,logdet_2 = np.linalg.slogdet(XX_host + cov)

RuntimeError: 1D tensors expected, but got 2D and 2D tensors

In [None]:
X = np.array(X_host)
y = np.array(Y_host)

In [None]:
y .dot(y)

4069.0527

In [None]:
# sufficient statistics
S = {'XX': X.T.dot(X),
        'Xy': X.T.dot(y),
        'yy': y .dot(y)
        }

posteriors = {}

# if 'non-private' in methods:
#     posteriors['non-private'] = run_non_private(model_prior_params, S, N)


#posteriors['naive'] = run_naive(model_prior_params, S, N, sensitivity_x, sensitivity_y, epsilon)

In [None]:
X = np.array(X_host)
y = np.array(Y_host)

sensitivity_x, sensitivity_y = get_sensativitiy(X,y)

epsilon = 1

In [None]:
data_dim = S['XX'].shape[0]

XX_comps = data_dim * (data_dim + 1) / 2  # upper triangular, not counting last column which is X
X_comps = data_dim  # last column
Xy_comps = data_dim
yy_comps = 1
sensitivity = XX_comps * sum(sensitivity_x[:-1]) ** 2 \
                + X_comps * sum(sensitivity_x[:-1]) \
                + Xy_comps * sum(sensitivity_x[:-1]) * sensitivity_y \
                + yy_comps * sensitivity_y ** 2

Z = {key: np.random.laplace(loc=val, scale=sensitivity / epsilon) for key, val in S.items()}

# symmetrize Z_XX since we only want to add noise to upper triangle
Z['XX'] = symmetrize(Z['XX'])

Z['X'] = Z['XX'][:, 0][:, None]

In [None]:
Z["X"].mean()

2205.008211286303

In [None]:
X_host.mean()

-8.1423976e-05

In [None]:
Z.keys()

dict_keys(['XX', 'Xy', 'yy', 'X'])

In [None]:
priv_XX = torch.tensor(Z["XX"])

In [None]:
(X_host.T @ X_host).mean()

0.018392561

In [None]:
type(X_host)

numpy.ndarray

In [None]:
priv_XX.mean()

tensor(455.8781, dtype=torch.float64)

In [None]:
np.linalg.slogdet(priv_XX +np.eye(X_host.shape[1]) )

(-1.0, 186.1891397270062)

In [None]:
torch.linalg.slogdet(X_host.T @ X_host +np.eye(X_host.shape[1]))

TypeError: linalg_slogdet(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

In [None]:
priv_X - X_host.T @ X_host

tensor([[ 4.1798, -0.1939,  2.3292,  5.1394, -4.4555,  1.1445,  1.1193, -4.0168,
          1.9040,  2.1432,  2.0106, -0.1350,  1.0632,  2.4952, -2.1895,  0.5696,
          0.4913, -1.8778,  0.8573,  0.9238],
        [-0.1939,  7.7291, -3.9290, -1.8749,  2.1200, -0.8760,  2.3354, -1.7791,
         -4.5772, -2.4300, -0.1276,  3.8359, -1.8889, -0.8781,  1.1507, -0.5232,
          1.0409, -0.9258, -2.3725, -1.2714],
        [ 2.3292, -3.9290,  6.9166,  4.7498,  0.2810,  1.2035, -3.9490,  0.9760,
          2.9695,  2.4714,  1.0389, -1.9428,  3.3068,  2.2264,  0.1712,  0.6588,
         -2.0021,  0.6660,  1.3276,  1.1224],
        [ 5.1394, -1.8749,  4.7498,  8.8634, -5.6383,  1.4736,  0.3408, -4.7306,
          4.0217,  3.2184,  2.4766, -0.9318,  2.1888,  4.2901, -2.7647,  0.7033,
          0.1061, -2.2913,  1.8564,  1.4242],
        [-4.4555,  2.1200,  0.2810, -5.6383,  7.8555, -3.6295, -2.4313,  2.7517,
         -3.7156, -4.4249, -2.1930,  1.1223,  0.1595, -2.7562,  3.9227, -1.7315,
      

In [None]:
X[:,0].max()

0.061485384

In [None]:
X[:,0].min()

-0.059155844

In [None]:
def get_sensativitiy(X,y):

    sens_x = np.zeros(X.shape[1])

    for i in range((X.shape[1])):
        sens_x[i] = X[:,i].max() - X[:,i].min()
    
    sens_y = y.max() - y.min()

    return sens_x,sens_y