In [123]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
        
compas = pd.read_csv('cox-violent-parsed.csv')
adult = pd.read_csv('adult_with_pii.csv')

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

for i in range(11001, len(compas)):
    compas.iloc[i, 0] = i+1

# Alternative DP Definitions
The cells below compare the accuracy results of the following differentially private algorithms across the adult and COMPAS datasets.

## Renyi-Differential Privacy, Zero-Concentrated Differential Privacy, and Epsilon Delta Differential Privacy

In [101]:
# Renyi-Differential Privacy Algorithm
# credit to Joe Near for python implementation
def gaussian_mech_RDP(val, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return val + np.random.normal(loc=0, scale=sigma)

# Epsilon-Delta Algorithm
def gaussian_mech_EPD(val, sensitivity, epsilon, delta):
    return val + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

# zero-Concentrated Algorithm
def gaussian_mech_zCDP(val, sensitivity, rho):
    sigma = np.sqrt((sensitivity**2 / (2*rho)))
    return val + np.random.normal(loc=0, scale=sigma)

# RDP Vector Implementation
def gaussian_mech_vec_RDP(vec, sensitivity, alpha, epsilon):
    return [gaussian_mech_RDP(v, sensitivity=sensitivity, alpha=alpha, epsilon=epsilon/len(vec)) for v in vec]

# RDP Epsilon -> Epsilon Delta Epsilon
def rdp_convert(alpha, epsilon_bar, delta):
    return epsilon_bar + np.log(1/delta) / (alpha-1)

# zCDP Convert -> Epsilon Delta Epsilon
def zcdp_convert(rho, delta):
    return rho + 2 * np.sqrt(rho * np.log(1/delta))

### Summation

In [134]:
def rdp_sum_age(dataset, sensitivity, alpha, epsilon):
    # clip the ages column
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_RDP(sum(clipped_data), sensitivity=sensitivity, alpha=alpha, epsilon=epsilon)

def edp_sum_age(dataset, sensitivity, epsilon, delta):
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_EPD(sum(clipped_data), sensitivity=sensitivity, epsilon=epsilon, delta=delta)

def zcdp_sum_age(dataset, sensitivity, rho):
    # clip the ages column
    clipped_data = dataset.clip(upper=sensitivity)
    # run the query and return the result
    return gaussian_mech_zCDP(sum(clipped_data), sensitivity=sensitivity, rho=rho)

In [137]:
epsilon = .01
alpha = 5
delta = 1e-5
rho = .0001
b = 100
#epsilon_delta = rdp_convert(alpha, epsilon, delta)

# Run Tests Here
# Adult Dataset
adult_real_results = adult['Age'].sum()
adult_edp_results = [edp_sum_age(adult['Age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
adult_rdp_results = [rdp_sum_age(adult['Age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
adult_zcdp_results = [zcdp_sum_age(adult['Age'],sensitivity=b,rho=rho) for _ in range(100)]

# COMPAS Dataset
compas_real_results = compas['age'].sum()
compas_edp_results = [edp_sum_age(compas['age'],sensitivity=b,epsilon=epsilon,delta=delta) for _ in range(100)]
compas_rdp_results = [rdp_sum_age(compas['age'],sensitivity=b,alpha=alpha,epsilon=epsilon) for _ in range(100)]
compas_zcdp_results = [zcdp_sum_age(compas['age'],sensitivity=b,rho=rho) for _ in range(100)]


# Error Results
adult_edp_error = np.sum([pct_error(adult_real_results, result) for result in adult_edp_results])/100
adult_rdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_rdp_results])/100
adult_zcdp_error = np.sum([pct_error(adult_real_results, result) for result in adult_zcdp_results])/100
compas_edp_error = np.sum([pct_error(compas_real_results, result) for result in compas_edp_results])/100
compas_rdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_rdp_results])/100
compas_zcdp_error = np.sum([pct_error(compas_real_results, result) for result in compas_zcdp_results])/100

print("Adult Dataset:")
print("Adult Epsilon-Delta Error:",adult_edp_error)
print("Adult Renyi Error:",adult_rdp_error)
print("Adult Zero-Concentrated Error:",adult_zcdp_error)
print("Compas Dataset:")
print("COMPAS Epsilon-Delta Error:",compas_edp_error)
print("COMPAS Renyi Error:",compas_rdp_error)
print("COMPAS Zero-Concentrated Error:",compas_zcdp_error)

Adult Dataset:
1256257
Adult Epsilon-Delta Error: 2.6002073775945678
Adult Renyi Error: 0.1000431511809559
Adult Zero-Concentrated Error: 0.47053205508525553
Compas Dataset:
COMPAS Epsilon-Delta Error: 50.61119867591097
COMPAS Renyi Error: 50.37657378748715
COMPAS Zero-Concentrated Error: 50.32914653527433


### Count

In [None]:
def gaussian_mech_RDP(val, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return val + np.random.normal(loc=0, scale=sigma)
    
def rdp_count_age(dataset, sensitivity, alpha, epsilon):
    return gaussian_mech_RDP(dataset, sensitivity=sensitivity, alpha=alpha, epsilon=epsilon)

### Average