In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
        
compas = pd.read_csv('cox-violent-parsed.csv')
adult = pd.read_csv('adult_with_pii.csv')

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

for i in range(11001, len(compas)):
    compas.iloc[i, 0] = i+1

# Alternative DP Definitions
The cells below compare the accuracy results of the following differentially private algorithms across the adult and COMPAS datasets.

## Renyi-Differential Privacy, Zero-Concentrated Differential Privacy, and Epsilon Delta Differential Privacy

In [76]:
# Renyi-Differential Privacy Algorithm
# credit to Joe Near for python implementation
def gaussian_mech_RDP(val, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return val + np.random.normal(loc=0, scale=sigma)

# Epsilon-Delta Algorithm
def gaussian_mech_EDP(val, sensitivity, epsilon, delta):
    return val + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

# zero-Concentrated Algorithm
def gaussian_mech_zCDP(val, sensitivity, rho):
    sigma = np.sqrt((sensitivity**2 / (2*rho)))
    return val + np.random.normal(loc=0, scale=sigma)

# RDP Vector Implementation
def gaussian_mech_vec_RDP(vec, sensitivity, alpha, epsilon):
    return [gaussian_mech_RDP(v, sensitivity=sensitivity, alpha=alpha, epsilon=epsilon/len(vec)) for v in vec]

# RDP Epsilon -> Epsilon Delta Epsilon
def rdp_convert(alpha, epsilon_bar, delta):
    return epsilon_bar + np.log(1/delta) / (alpha-1)

# zCDP Convert -> Epsilon Delta Epsilon
def zcdp_convert(rho, delta):
    return rho + 2 * np.sqrt(rho * np.log(1/delta))

### Summation

In [77]:
def rdp_sum(dataset, sensitivity, alpha, epsilon):
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_RDP(sum(clipped_data), sensitivity=sensitivity, alpha=alpha, epsilon=epsilon)

def edp_sum(dataset, sensitivity, epsilon, delta):
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_EDP(sum(clipped_data), sensitivity=sensitivity, epsilon=epsilon, delta=delta)

def zcdp_sum(dataset, sensitivity, rho):
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_zCDP(sum(clipped_data), sensitivity=sensitivity, rho=rho)

In [78]:
epsilon = .01
alpha = 5
delta = 1e-5
rho = .0001
b = 100

# Run Tests Here
# Adult Dataset
adult_real_results = adult['Age'].sum()
adult_edp_results = [edp_sum(adult['Age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
adult_rdp_results = [rdp_sum(adult['Age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
adult_zcdp_results = [zcdp_sum(adult['Age'],sensitivity=b,rho=rho) for _ in range(100)]

# COMPAS Dataset
compas_real_results = compas['age'].sum()
compas_edp_results = [edp_sum(compas['age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
compas_rdp_results = [rdp_sum(compas['age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
compas_zcdp_results = [zcdp_sum(compas['age'],sensitivity=b,rho=rho) for _ in range(100)]


# Error Results
adult_edp_error = np.sum([pct_error(adult_real_results, result) for result in adult_edp_results])/100
adult_rdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_rdp_results])/100
adult_zcdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_zcdp_results])/100
compas_edp_error = np.sum([pct_error(compas_real_results, result) for result in compas_edp_results])/100
compas_rdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_rdp_results])/100
compas_zcdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_zcdp_results])/100

print("Adult Dataset:")
print("Adult Epsilon-Delta Error:",adult_edp_error)
print("Adult Renyi Error:",adult_rdp_error)
print("Adult Zero-Concentrated Error:",adult_zcdp_error)
print("Compas Dataset:")
print("COMPAS Epsilon-Delta Error:",compas_edp_error)
print("COMPAS Renyi Error:",compas_rdp_error)
print("COMPAS Zero-Concentrated Error:",compas_zcdp_error)

Adult Dataset:
Adult Epsilon-Delta Error: 3.0939425493977546
Adult Renyi Error: 0.0965477153993209
Adult Zero-Concentrated Error: 0.47980053763751285
Compas Dataset:
COMPAS Epsilon-Delta Error: 6.147383367331891
COMPAS Renyi Error: 0.20686923141225858
COMPAS Zero-Concentrated Error: 0.8303894163393789


### Count

In [79]:
def gaussian_mech_RDP(val, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return val + np.random.normal(loc=0, scale=sigma)
    
def rdp_count(dataset, sensitivity, alpha, epsilon):
    return gaussian_mech_RDP(dataset, sensitivity=sensitivity, alpha=alpha, epsilon=epsilon)

def edp_count(dataset, sensitivity, epsilon, delta):
    return gaussian_mech_EDP(dataset, sensitivity=sensitivity, epsilon=epsilon, delta=delta)

def zcdp_count(dataset, sensitivity, rho):
    return gaussian_mech_zCDP(dataset, sensitivity=sensitivity, rho=rho)

In [80]:
epsilon = .01
alpha = 5
delta = 1e-5
rho = .0001
b = 1

# Run Tests Here
# Adult Dataset
adult_real_results = int(adult['Age'].count())
adult_rdp_results = [rdp_count(adult_real_results,sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
adult_edp_results = [edp_count(adult_real_results,sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
adult_zcdp_results = [zcdp_count(adult_real_results,sensitivity=b,rho=rho) for _ in range(100)]

adult_rdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_rdp_results])/100
adult_edp_error = np.sum([pct_error(adult_real_results, result) for result in adult_edp_results])/100
adult_zcdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_zcdp_results])/100

# COMPAS Dataset
compas_real_results = int(compas['age'].count())
compas_edp_results = [edp_count(compas_real_results,sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
compas_rdp_results = [rdp_count(compas_real_results,sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
compas_zcdp_results = [zcdp_count(compas_real_results,sensitivity=b,rho=rho) for _ in range(100)]

compas_edp_error = np.sum([pct_error(compas_real_results, result) for result in compas_edp_results])/100
compas_rdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_rdp_results])/100
compas_zcdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_zcdp_results])/100

print("Adult Dataset:")
print("Adult Epsilon-Delta Error:",adult_edp_error)
print("Adult Renyi Error:",adult_rdp_error)
print("Adult Zero-Concentrated Error:",adult_zcdp_error)
print("Compas Dataset:")
print("COMPAS Epsilon-Delta Error:",compas_edp_error)
print("COMPAS Renyi Error:",compas_rdp_error)
print("COMPAS Zero-Concentrated Error:",compas_zcdp_error)

Adult Dataset:
Adult Epsilon-Delta Error: 1.285617977563333
Adult Renyi Error: 0.040324648731444174
Adult Zero-Concentrated Error: 0.1582381850879725
Compas Dataset:
COMPAS Epsilon-Delta Error: 2.067833200740445
COMPAS Renyi Error: 0.07689301304829192
COMPAS Zero-Concentrated Error: 0.2623938293766288


### Average

In [81]:
def rdp_avg(dataset, sensitivity, alpha, epsilon):
    clipped_data = dataset.clip(upper=sensitivity)
    # add noise to sum query
    noisy_sum = gaussian_mech_RDP(sum(clipped_data), sensitivity=sensitivity, alpha=alpha, epsilon=epsilon/2)
    # add noise to count query
    noisy_count = gaussian_mech_RDP(len(clipped_data),sensitivity=1,alpha=alpha,epsilon=epsilon/2)
    return noisy_sum/noisy_count

def edp_avg(dataset, sensitivity, epsilon, delta):
    clipped_data = dataset.clip(upper=sensitivity)
    # add noise to sum query
    noisy_sum = gaussian_mech_EDP(sum(clipped_data), sensitivity=sensitivity, epsilon=epsilon/2, delta=delta)
    # add noise to count query
    noisy_count = gaussian_mech_EDP(len(clipped_data),sensitivity=1,epsilon=epsilon/2,delta=delta)
    return noisy_sum/noisy_count

def zcdp_avg(dataset, sensitivity, rho):
    clipped_data = dataset.clip(upper=sensitivity)
    # add noise to sum query
    noisy_sum = gaussian_mech_zCDP(sum(clipped_data), sensitivity=sensitivity, rho=rho/2)
    # add noise to count query
    noisy_count = gaussian_mech_zCDP(len(clipped_data),sensitivity=1,rho=rho/2)
    return noisy_sum/noisy_count

In [82]:
epsilon = .01
alpha = 5
delta = 1e-5
rho = .0001
b = 100

# Run Tests Here
# Adult Dataset
adult_real_results = adult['Age'].sum() / adult['Age'].count()
adult_edp_results = [edp_avg(adult['Age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
adult_rdp_results = [rdp_avg(adult['Age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
adult_zcdp_results = [zcdp_avg(adult['Age'],sensitivity=b,rho=rho) for _ in range(100)]

# COMPAS Dataset
compas_real_results = compas['age'].sum() / compas['age'].count()
compas_edp_results = [edp_avg(compas['age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
compas_rdp_results = [rdp_avg(compas['age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
compas_zcdp_results = [zcdp_avg(compas['age'],sensitivity=b,rho=rho) for _ in range(100)]


# Error Results
adult_edp_error = np.sum([pct_error(adult_real_results, result) for result in adult_edp_results])/100
adult_rdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_rdp_results])/100
adult_zcdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_zcdp_results])/100
compas_edp_error = np.sum([pct_error(compas_real_results, result) for result in compas_edp_results])/100
compas_rdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_rdp_results])/100
compas_zcdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_zcdp_results])/100

print("Adult Dataset:")
print("Adult Epsilon-Delta Error:",adult_edp_error)
print("Adult Renyi Error:",adult_rdp_error)
print("Adult Zero-Concentrated Error:",adult_zcdp_error)
print("Compas Dataset:")
print("COMPAS Epsilon-Delta Error:",compas_edp_error)
print("COMPAS Renyi Error:",compas_rdp_error)
print("COMPAS Zero-Concentrated Error:",compas_zcdp_error)

Adult Dataset:
Adult Epsilon-Delta Error: 6.807213551053998
Adult Renyi Error: 0.13942231779916703
Adult Zero-Concentrated Error: 0.6357329818440466
Compas Dataset:
COMPAS Epsilon-Delta Error: 13.422846638898946
COMPAS Renyi Error: 0.31855803331900984
COMPAS Zero-Concentrated Error: 1.43985197176704


### Summation