In [1]:
import pandas as pd
import numpy as np
import random
from math import pow
from decimal import Decimal
import os
import pickle
import hashlib
os.chdir('Resources/')

In [2]:
def gcd(a, b):
    if a < b:
        return gcd(b, a)
    elif a % b == 0:
        return b
    else:
        return gcd(b, a % b)

In [3]:
def gen_key(q):
    key = random.randint(10**20, q)  # Ensure q is an integer
    while gcd(q, key) != 1:
        key = random.randint(10**20, q)
    return key

In [4]:
def power(a, b, c):
    x = 1
    y = a
    while b > 0:
        if b % 2 != 0:
            x = (x * y) % c
        y = (y * y) % c
        b = int(b / 2)
    return x % c

In [None]:
def encrypt(msg, q, h, g):
    en_msg = []
    k = gen_key(q)  # Private key for sender
    s = power(h, k, q)
    p = power(g, k, q)
    
    for i in range(0, len(msg)):
        en_msg.append(s * msg[i])  # Encrypt each element
    
    return en_msg, p

In [6]:
df = pd.read_csv('1_Structured_Data.csv')

In [7]:
with open('3_Public_Key.pkl', 'rb') as f_public:
    public_key_data = pickle.load(f_public)

q = public_key_data['q']
g = public_key_data['g']
public_keys = public_key_data['public_keys']

In [8]:
encrypted_values_list = []
p_list = []

for i in range(11):
    attribute_values = df.iloc[:, i].tolist()

    # Factorize and handle missing values (None)
    encoded_values = pd.factorize(attribute_values)[0]
    encoded_values = [None if val == -1 else val for val in encoded_values]

    # Maintain a dictionary for deterministic encryption within the same attribute
    value_to_encrypted = {}
    value_to_p = {}

    encrypted_values = []
    p_values = []

    for val in encoded_values:
        if val is not None:  # Process only valid values
            if val not in value_to_encrypted:  # Encrypt if not already encrypted
                encrypted_result, p = encrypt([val], q, public_keys[i], g)
                value_to_encrypted[val] = encrypted_result[0]
                value_to_p[val] = p

            encrypted_values.append(value_to_encrypted[val])
            p_values.append(value_to_p[val])
        else:
            encrypted_values.append(None)  # Retain None values
            p_values.append(None)

    encrypted_values_list.append(encrypted_values)
    p_list.append(p_values)

    # Debug output
    print(f"Attribute {i+1} - Public Key (h): {public_keys[i]}")
    print(f"Attribute {i+1} - Encrypted Values:", encrypted_values[:10])

# Create DataFrame from encrypted values
encrypted_df = pd.DataFrame(encrypted_values_list).T
encrypted_df.columns = df.columns[:11]


  encoded_values = pd.factorize(attribute_values)[0]


Attribute 1 - Public Key (h): 1042016265484613204028080355066285368559887720363
Attribute 1 - Encrypted Values: [0, 5086600012490731442943930571085139155474036451295, 5207383839504054707363518626041174678109639052454, 7627115353597159256912609008269949662070481573805, 17531699370909507068407927260739537185473745991116, 30562508776055667191160869995759161807348436562815, 55063456334314987424963129158875172019883819437442, 17531699370909507068407927260739537185473745991116, 5207383839504054707363518626041174678109639052454, 7627115353597159256912609008269949662070481573805]
Attribute 2 - Public Key (h): 7279734963124952722665336995010195927456722704469
Attribute 2 - Encrypted Values: [0, 7071071391752259975941988339317745999154049513885, 0, 7071071391752259975941988339317745999154049513885, 0, 0, 7071071391752259975941988339317745999154049513885, 0, 0, 7071071391752259975941988339317745999154049513885]
Attribute 3 - Public Key (h): 5931716208391435207564961074241222946251477815475
Attrib

In [9]:
df['HeartDisease'] = df['HeartDisease'].replace({'Yes': 1, 'No': 0})
combined_df = pd.concat([encrypted_df, df['HeartDisease']], axis=1)
combined_df.to_csv('4_Encrypted_Data.csv', index=False)

  df['HeartDisease'] = df['HeartDisease'].replace({'Yes': 1, 'No': 0})


In [10]:
with open('4_Encrypted_Values_and_P.pkl', 'wb') as f_encrypted:
    pickle.dump({'encrypted_values_list': encrypted_values_list, 'p_list': p_list}, f_encrypted)