In [1]:
import numpy as np
import pandas as pd 
import math

In [2]:
# read in the csv file
df = pd.read_csv("patients_data.csv")

In [3]:
# define the epsilon and delta values
epsilon = 0.5
delta = 0.1

In [4]:
# apply Laplace mechanism to Donation column
donation_mean = df["Donation"].mean()
donation_count = df["Donation"].count()
donation_noise = np.random.laplace(0, 1/epsilon, donation_count)
df["Donation"] = df["Donation"] + donation_noise
df["Donation"] = df["Donation"].clip(lower=0) # ensure no negative values

In [5]:
# apply Gaussian mechanism to Health Score column
health_score_mean = df["Health_Score"].mean()
health_score_stddev = df["Health_Score"].std()
health_score_count = df["Health_Score"].count()
health_score_noise = np.random.normal(0, health_score_stddev * math.sqrt(2 * math.log(1.25/delta)) / epsilon, health_score_count)
df["Health_Score"] = df["Health_Score"] + health_score_noise

In [6]:
# print the modified dataframe
df.head(20)

Unnamed: 0,Patient_ID,Health_Camp_ID,Number_of_stall_visited,Last_Stall_Visited_Number,Donation,Health_Score,First_Interaction
0,517875,6527,3,1,40.051983,-0.466199,18-Jun-03
1,504692,6578,1,1,20.190869,2.457396,20-Jul-03
2,504692,6527,3,1,12.301273,-0.986407,02-Nov-02
3,493167,6527,4,4,29.87888,0.10565,02-Nov-02
4,510954,6528,2,2,15.309572,0.984094,24-Nov-02
5,501825,6527,2,4,36.003567,-1.180136,06-Apr-03
6,495620,6527,1,1,30.99175,1.012486,26-Apr-03
7,526542,6528,2,2,30.367485,1.631818,05-Apr-03
8,517351,6578,3,5,9.756054,-0.151323,10-Apr-03
9,517351,6527,3,1,28.515627,0.4866,08-Jun-03


In [7]:
# save the modified dataframe to a new csv file
df.to_csv("encrypted_patients.csv", index=False)

In [8]:
# define the encryption parameters
p = 2147483647 # large prime modulus
t = 64 # bit-length of the plaintext modulus
noise_scale = 10 # scale of the random noise to add to the homomorphic output

In [9]:
# define the homomorphic operations
def homomorphic_square(x, p, t):
    return int(((x**2) % p) + np.random.randint(-noise_scale, noise_scale))

def homomorphic_sum(x, p, t):
    s = 0
    for i in range(len(x)):
        s += x[i]
    return int((s % p) + np.random.randint(-noise_scale, noise_scale))

In [10]:
# read in the encrypted csv file
df_encrypted = pd.read_csv("encrypted_patients.csv")

In [11]:
# convert the data to plaintext
donation_plaintext = df_encrypted["Donation"].to_numpy(dtype=np.int64)
health_score_plaintext = df_encrypted["Health_Score"].to_numpy(dtype=np.int64)

In [12]:
# perform homomorphic operations on the plaintext data
donation_squared = [homomorphic_square(x, p, t) for x in donation_plaintext]
health_score_sum = homomorphic_sum(health_score_plaintext, p, t)

In [13]:
# create a new dataframe with the homomorphic results
df_homomorphic = pd.DataFrame({
    "Patient_ID": df_encrypted["Patient_ID"],
    "Health_Camp_ID": df_encrypted["Health_Camp_ID"],
    "Number_of_stall_visited": df_encrypted["Number_of_stall_visited"],
    "Last_Stall_Visited_Number": df_encrypted["Last_Stall_Visited_Number"],
    "First_Interaction": df_encrypted["First_Interaction"],
    "Donation Squared": donation_squared,
    "Health Score Sum": health_score_sum
})

In [14]:
# save the dataframe to a csv file
df_homomorphic.to_csv("homomorphic_results2.csv", index=False)

print("Results saved to homomorphic_results2.csv")

Results saved to homomorphic_results2.csv


In [15]:
#print the homomorphic dataframe
df_homomorphic = pd.read_csv("homomorphic_results2.csv")

df_homomorphic.head(20)

Unnamed: 0,Patient_ID,Health_Camp_ID,Number_of_stall_visited,Last_Stall_Visited_Number,First_Interaction,Donation Squared,Health Score Sum
0,517875,6527,3,1,18-Jun-03,1593,184
1,504692,6578,1,1,20-Jul-03,406,184
2,504692,6527,3,1,02-Nov-02,134,184
3,493167,6527,4,4,02-Nov-02,832,184
4,510954,6528,2,2,24-Nov-02,217,184
5,501825,6527,2,4,06-Apr-03,1298,184
6,495620,6527,1,1,26-Apr-03,897,184
7,526542,6528,2,2,05-Apr-03,906,184
8,517351,6578,3,5,10-Apr-03,86,184
9,517351,6527,3,1,08-Jun-03,777,184
