In [1]:
# import libraries (non-cryptogaphic)
import random # to generate phone numbers
import pandas as pd
from functools import reduce
import os

# import libraries (cryptographic)
import cryptography.hazmat.primitives.asymmetric.dh as dh
from cryptography.fernet import Fernet
import hashlib
import sympy
import secrets

# Generate phone numbers and store them in phone_numbers.csv file

In [2]:
random.seed(10) # to ensure same phone numbers generated every time

In [3]:
# class to generate phone numbers for grab and gojek
class PhoneNumberGenerator:
    def __call__(self, count):
        phone_numbers = random.sample(range(80000000,100000000), count)
        return phone_numbers 

class FakePhoneNumberGenerator:
    def __call__(self, count):
        numbers = random.sample(range(10000000,80000000), count) # invalid phone numbers generated
        return numbers
            
    
# class to store numbers to csv
class PhoneNumberStorageManager:
    def __init__(self):
        self.filename = "phone_numbers_padded.csv"
    def __call__(self, gojek_phone_numbers, grab_phone_numbers):
        d = {"gojek": gojek_phone_numbers, 
            "grab": grab_phone_numbers}
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()])) # create dataframe
        df.to_csv(self.filename, index = False) # store values to file "phone_numbers_padded.csv"

In [4]:
set_size = 100 # both parties are to exchange sets of a pre-determined size

# generate and store phone numbers
gojek_phone_number_count = 61 # inclusive of phone numbers in common with grab
grab_phone_number_count = 91 # inclusive of phone numbers in common with gojek
common_phone_number_count = 10

# instantiate required classes
phone_number_generator = PhoneNumberGenerator()
fake_phone_number_generator = FakePhoneNumberGenerator()
phone_number_storage_manager = PhoneNumberStorageManager()

# generate phone numbers
phone_numbers = phone_number_generator(gojek_phone_number_count+grab_phone_number_count-common_phone_number_count)
common_phone_numbers = phone_numbers[0:common_phone_number_count]
gojek_phone_numbers = phone_numbers[0:gojek_phone_number_count]
grab_phone_numbers = common_phone_numbers + phone_numbers[gojek_phone_number_count:] 

# generate fake phone numbers
gojek_fake_phone_numbers = fake_phone_number_generator(set_size-gojek_phone_number_count)
grab_fake_phone_numbers = fake_phone_number_generator(set_size-grab_phone_number_count)

# add fake phone numbers to phone numbers
gojek_padded_set = gojek_phone_numbers + gojek_fake_phone_numbers
grab_padded_set = grab_phone_numbers + grab_fake_phone_numbers

# shuffle phone number lists
random.shuffle(gojek_padded_set)
random.shuffle(grab_padded_set)

# write phone numbers (with fake ones) to csv file
phone_number_storage_manager(gojek_padded_set, grab_padded_set)

# Define classes for the necessary for the algorithm

In [5]:
# class to generate numbers required for psi
class NumberGenerator:
    
    def generate_public_parameters(self, size):
        # method to generate p, q and factors of p-1
        p = self.generate_safe_prime(size)
        print(f"p is prime: {sympy.ntheory.isprime(p)}")
        length_of_p = len(bin(p)[2:]) # should be 1024
        print(f"Length of prime modulus, p: {length_of_p}.\nNote: Should be {size}.")
        q = (p-1)//2
        print(f"q is prime: {sympy.ntheory.isprime(q)}")
        factors_pminus1 = [1, 2, q] # since q is prime, 2q only has these 3 factors excluding itself
                                    # 2q = p-1
                                    # factors of p-1 required to compute order of generators (lagrange theorem)
        return p, factors_pminus1

    def generate_safe_prime(self, size):
        # method to generate safe prime for p
        candidate = dh.generate_parameters(2, size).parameter_numbers().p # generate 1024-bit prime number
        while True:
            # the method used from the cryptography already generates a safe prime, this portion is merely to double confirm
            is_safe_prime = sympy.ntheory.isprime((candidate-1)//2) # if safe prime, (candidate-1)/2 is prime
            if (is_safe_prime):
                break
            else:
                candidate = dh.generate_parameters(2, size).parameter_numbers().p
                print(candidate)
 
        return candidate
    
    
    def generate_random_number(self, size):
        # method to generate client's secret
        return secrets.randbits(size)
            
# class to inspect values          
class NumberInspector:
    
    def check_is_primitive_generator(self, candidate, factors_divisorminus1, divisor): # note: factors should be the factors of divisor-1
        
        # apply lagrange theorem
        for possible_order in factors_divisorminus1: # check congruence for all factors (factors is exclusive of p-1 itself)
            result = pow(candidate, possible_order, divisor) # fast modular exponentiation implemented in pow(x,y,z)
                                                             # python integers have arbitrary precisions, 
                                                             # no overflow would occur if operations done in pure python
            if (result == 1):
                return False # if candidate exponent any of the factors (1,2,q) congruent to 1modp, candidate is not a primitive
                             # generator
        return True # order of candidate == p-1, therefore candidate is a primitive generator

    
class StorageManager:
    
    def store_data(self, filename, data):
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe
        df.to_csv(filename, index=False) # store data to file with filename


# Create psi client class

In [6]:
# client class (both grab and gojek are clients communicating directly with each other)
class Client:
    def __init__(self, name, other_party_name, private_key_size, phone_numbers, p, factors_pminus1, fernet_key):
        
        self.number_inspector = NumberInspector()
        self.number_generator = NumberGenerator()
        
        # for asymmetric encryption
        self.private_key_size = private_key_size
        self.private_key = self.number_generator.generate_random_number(private_key_size)
        self.my_set = phone_numbers
        self.p = p # prime modulus
        self.factors_pminus1 = factors_pminus1 # to calculate order of hashed phone numbers, 
                                               # since algorithm requires them to be primitive generators
        
        # for symmetric encryption
        self.fernet_key = fernet_key
        self.f = Fernet(fernet_key)
        
        # variables to track for psi algorithm
        self.my_hashed_set = None # h(x)
        self.my_self_encrypted_set = None # (h(x)^(my_secret))modp
        self.my_encrypted_set = None # (h(x)^(my_secret)(other_party_secret))modp
        self.other_party_encrypted_set = None # (h(y)^(my_secret)(other_party_secret))modp
        self.common_values = None # common phone numbers, with invalid phone numbers removed
        
        # variables to ensure no shuffling of encrypted values
        self.my_common_hashes = None # common hashed values with hashed values of invalid phone numbers included
        self.my_sum_of_hashes = None # sum of the common hashed values, h(x_1) + h(x_2) + ... + h(x_n), 
                                     # where n is the number of elements in the intersection of the 2 sets and 
                                     # h(x_i) are the elements in the intersection
        
        self.my_self_encrypted_sum_of_hashes = None # ((my_sum_of_hashes)^(my_secret))modp
        
        self.my_actual_value = None # other_party_encrypted_sum_of_hashes, 
                                    # ((other_party_sum_of_hashes)^(my_secret)(other_party_secret))modp
        self.my_committed_value = None # hash value of other_party_encrypted_sum_of_hashes, h(my_actual_value)

        
        
        # create datafile for communication with another party
        
        # content to store in file
        # need to share self_encrypted_values and other_party_encrypted_values for basic psi algorithm
        # need to share my_self_encrypted_sum_of_hashes, my_committed_value, and my_actual_value for preventing the shuffling of encrypted values
        # common_values was not included unlike previous cases as actual value is now used for verification
        # also, the adversary type changed from semi-honest to malicious, therefore common_values not shared
        self.my_dict = {
            'my_self_encrypted_set': None, 
            'other_party_encrypted_set': None,
            'my_self_encrypted_sum_of_hashes': None,
            'my_committed_value': None,
            'my_actual_value': None
        }
        
        # filenames
        self.name = name
        self.filename = name + "_data_v2.0.csv"
        self.other_party_name = other_party_name
        self.other_party_filename = other_party_name + "_data_v2.0.csv"
        
        # create file
        self.storage_manager = StorageManager()
        self.storage_manager.store_data(self.filename, self.my_dict)

    def hash_to_primitive_root_modulo_p(self, element): 
        # method to hash phone numbers to primitive root modulo p i.e. primitive generator

        endian = "big"
        element = element.to_bytes(4, endian)
        hash_hex = hashlib.sha256(element).hexdigest() # sha3_256
        hash_int = int(hash_hex, 16)
        while True:
            # repeatedly hash until primitive root modulo p is obtained
            is_primitive_generator = self.number_inspector.check_is_primitive_generator(
                hash_int, self.factors_pminus1, self.p
            )
            if (is_primitive_generator):
                break
            else:
                hash_int = hash_int.to_bytes(32, endian)
                hash_hex = hashlib.sha256(hash_int).hexdigest()
                hash_int = int(hash_hex, 16)
                
        return hash_int
    
    def modular_exponentation(self, element):
        # compute (element^(private_key))modp
    
        return pow(element, self.private_key, self.p)
    
    def hash_set(self):
        # hash all phone numbers in my set to primitive root modulo p, one by one
        
        self.my_hashed_set = []
        
        for element in self.my_set:
            hashed_value = self.hash_to_primitive_root_modulo_p(element)
            self.my_hashed_set.append(hashed_value)
            
    def encrypt_set(self, is_other_party):
        # encrypt all elements in a given set using private_key, one by one
        
        # two scenarios to consider
        # one: encrypt set sent by the other party
        if (is_other_party):
            decrypted_other_party_set = self.receive_data("my_self_encrypted_set")
            other_party_set_int = []
            for element_string in decrypted_other_party_set:
                other_party_set_int.append(int(element_string))
            set_to_encrypt = other_party_set_int
        # two: encrypt my own set
        else:
            set_to_encrypt = self.my_hashed_set
        
        # encrypt values in given set, one by one
        encrypted_values = []
        for element in set_to_encrypt:
            encrypted_value = self.modular_exponentation(element)
            encrypted_values.append(encrypted_value)
            
        # assign the encrypted set to the correct variable
        # update csv file used for communication
        if (is_other_party):
            self.other_party_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "other_party_encrypted_set")
        else:
            self.my_self_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "my_self_encrypted_set")
                    
        
    def get_intersection(self):
        
        # get intersection
        
        my_encrypted_set = self.receive_data("other_party_encrypted_set") # read my encrypted set from the other party's file
        my_encrypted_set_int = []
        
        # convert read values to integer
        for element in my_encrypted_set:
            my_encrypted_set_int.append(int(element))
            
        # assign to correct variable
        self.my_encrypted_set = my_encrypted_set_int
        
        # get intersection
        encrypted_common_values = set(self.my_encrypted_set).intersection(self.other_party_encrypted_set)
        
        index_of_common_values = []
        
        # find the index of the elements in the intersection in my_encrypted_set
        for element in encrypted_common_values:
            index_of_common_values.append(self.my_encrypted_set.index(element))
            
        self.common_values = [] # common valid phone numbers
        self.my_common_hashes = [] # common hashed values (inclusive of hashed value of invalid phone numbers)
        
        # find the values in my_set and my_hashed_set corresponding to the index of the elements in the intersection
        for index in index_of_common_values:
            potential_phone_number = self.my_set[index]
            
            if ((potential_phone_number-79999999)>0): # remove any potential fake numbers that intersect
                self.common_values.append(potential_phone_number)
                
            self.my_common_hashes.append(self.my_hashed_set[index])
        
    
    
    def compute_sum_of_common_hashed_values(self):
        # compute sum of hashed values in the common intersection
        
        self.my_sum_of_hashes = sum(self.my_common_hashes)
    
    def encrypt_sum_of_common_hashed_values(self, is_other_party):
        # encrypt the sum of common hashed values
        # there are two scenarios
        # one: encrypt other party's self encrypted sum of hashes
        if(is_other_party):
            value_to_encrypt = int((self.receive_data('my_self_encrypted_sum_of_hashes'))[0]) # get other party's self encrypted sum of hashes, 
                                                                                         # ((other_party_sum_of_hashes)^(other_party_secret))modp
        # two: encrypt my_sum_of_hashes
        else:
            value_to_encrypt = self.my_sum_of_hashes # to obtain my_self_encrypted_sum_of_hashes
        
        encrypted_value = self.modular_exponentation(value_to_encrypt) # get ((value_to_encrypt)^(my_secret))modp
        
        # assign to the corresponding variable
        if (is_other_party):
            self.my_actual_value = encrypted_value # do not send this value yet, must send the hash of this first (for commit mechanism)
        else:
            self.my_self_encrypted_sum_of_hashes = encrypted_value
            self.send_data([self.my_self_encrypted_sum_of_hashes], "my_self_encrypted_sum_of_hashes")
        
    
    def hash_value(self, value, byte_size):
        # hash the given value with size = byte_size
        
        endian = "big"
        element = value.to_bytes(byte_size, endian)
        hash_hex = hashlib.sha256(element).hexdigest() # sha3_256
        hash_int = int(hash_hex, 16)
        return hash_int
    
    def commit_to_hash_value(self):
        # hash actual value and send the hashed value
        self.my_committed_value = self.hash_value(self.my_actual_value, self.private_key_size//8)
        self.send_data([self.my_committed_value], "my_committed_value")
    
    
    def reveal_actual_value(self):
        # send the actual value
        
        self.send_data([self.my_actual_value], "my_actual_value")
    
    
    def check_if_actual_and_committed_values_match(self):
        # check if the committed value is the hash of the actual value
        
        # get required values        
         
        other_party_committed_value = int((self.receive_data("my_committed_value"))[0]) # get other_party_committed_value
        other_party_actual_value = int((self.receive_data("my_actual_value"))[0]) # get other_party_actual_value
        hashed_value = self.hash_value(other_party_actual_value, self.private_key_size//8) # hash other_party_actual_value
        
        # check if values match
        if (hashed_value == other_party_committed_value):
            print("committed and actual values match!")
            return True
        else:
            print("committed and actual values do not match!")
            return False
    
    def check_if_actual_values_match(self):
        # check if my actual value is equal to the other party's actual value
        
        other_party_actual_value = int((self.receive_data("my_actual_value"))[0]) # get other_party_actual_value
        
        # check if values match
        if (other_party_actual_value == self.my_actual_value):
            print("actual values match!")
            return True
        else:
            print("actual values do not match!")
            return False
    
    def encrypt_data(self, plaintext):
        # encrypt data with Fernet
        
        endian = "big"
        element = plaintext.to_bytes(self.private_key_size//8, endian) # 1024 bits == 128 bytes
        cipher_text = self.f.encrypt(element)
        return cipher_text
    
    def decrypt_data(self, ciphertext):
        # decrypt data encrypted by Fernet
        
        ciphertext_bytes = ciphertext.encode('utf-8')[2:-1] # convert from string back to bytes
        endian = "big"
        element_in_bytes = self.f.decrypt(ciphertext_bytes)
        plaintext = int.from_bytes(element_in_bytes, endian)
        return plaintext

    def send_data(self, data_to_send, column_name):
        # send data means writing to file. encrypt data with Fernet
        
        # encrypt data
        encrypted_data_to_send = []
        for element in data_to_send:
            encrypted_element = self.encrypt_data(element)
            encrypted_data_to_send.append(encrypted_element)
            
        # send data
        self.my_dict[column_name] = encrypted_data_to_send
        self.storage_manager.store_data(self.filename, self.my_dict)
        
        
    def receive_data(self, column_name):
        # receive data means reading from file (my file). decrypt data encrypted by Fernet
        
        # receive data
        encrypted_data = self.get_other_party_data()[column_name].to_list()
        
        # decrypt data
        decrypted_data = []
        for element in encrypted_data:
            if type(element) is float: # remove NaN
                continue
            decrypted_element = self.decrypt_data(element)
            decrypted_data.append(decrypted_element)
        return decrypted_data
    
    def get_my_data(self):
        # read my file as dataframe (other party's file)
        
        return pd.read_csv(self.filename)
            
    def get_other_party_data(self):
        # read other party's file as dataframe
        
        return pd.read_csv(self.other_party_filename)
    
    


# Initialize context

In [7]:
# assign pre-determined variables for psi
key_size = 1024 # both private keys and large prime

# create key for symmetric key cryptography
fernet_key = Fernet.generate_key()

# create public parameters required for psi
number_generator = NumberGenerator()
p, factors_pminus1 = number_generator.generate_public_parameters(key_size)

# create clients
grab = Client("grab", "gojek", key_size, grab_padded_set, p, factors_pminus1, fernet_key)
gojek = Client("gojek", "grab", key_size, gojek_padded_set, p, factors_pminus1, fernet_key)



p is prime: True
Length of prime modulus, p: 1024.
Note: Should be 1024.
q is prime: True


  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


# Get intersection

## Step 1: Hash phone numbers

In [8]:
# clients hash their own set
grab.hash_set()
gojek.hash_set()

### Clients' status after step 1
Note: hashed set is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [9]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set
0,3444554169003369415400187425444370377909224933...,2578545958466060453428636263208319888439639420...
1,2971259318188054936238701069597951289131550069...,5706214998945741993120430770407017796268789491...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...
3,3931935781355731990509341551661525053343436962...,2835741223782226842638506406959946569416451500...
4,5239384073106902936597081070089405716452234004...,1959933681547377774404303441869569823311439691...
...,...,...
95,8697318885625964372170362596957150767783631535...,2064846653753480132804612906272453669387169259...
96,1516234710201293820270438455976630785218809049...,8866985291088494051041055141151271041321507008...
97,4322489114223637882196823987215027215086950043...,3948222474783499474687611776797431125471701320...
98,9196222794520213004035745078489116002064573260...,7018556158959834041117086565402060628499966280...


## Step 2: encrypt hashed set with own private key

In [10]:
# clients self encrypt hashed set
grab.encrypt_set(False) # set is_other_party to false to encrypt own hashed set
gojek.encrypt_set(False)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 2 (value of variables in client)

In [11]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set
0,3444554169003369415400187425444370377909224933...,2578545958466060453428636263208319888439639420...,3592086281017241634746505393053283733740068783...,8770302618384716433333032361846994025627196830...
1,2971259318188054936238701069597951289131550069...,5706214998945741993120430770407017796268789491...,7633350964126484409309452627694517501222527574...,5310231704431437302744741288334690444765811176...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,1229815915116934356023871574874782827041523465...,9669551254738722499074397480261917397203402662...
3,3931935781355731990509341551661525053343436962...,2835741223782226842638506406959946569416451500...,8139458928003081052903827336701217101861095147...,1161828185738055223275160186294651839766678934...
4,5239384073106902936597081070089405716452234004...,1959933681547377774404303441869569823311439691...,1418445934300507413740055109715399683048444206...,3443208215257119583651725732089150910091680518...
...,...,...,...,...
95,8697318885625964372170362596957150767783631535...,2064846653753480132804612906272453669387169259...,1096791066400963198104128807207516920845522770...,1021065309919929937547475049889662100627000503...
96,1516234710201293820270438455976630785218809049...,8866985291088494051041055141151271041321507008...,7467914080589737191277203346621734518960516771...,3140527904196036531607348919782301327096473948...
97,4322489114223637882196823987215027215086950043...,3948222474783499474687611776797431125471701320...,2659317780621279075993364649577709268729743289...,2583951878794420900057887075859672159727914076...
98,9196222794520213004035745078489116002064573260...,7018556158959834041117086565402060628499966280...,8233606371079115844115653883093993223457940409...,1573161205348916629764635631544172635725013142...


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [12]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokghuN07GmPvHNefFOs292L2PmcI5wPIvnvc...,,,,
1,b'gAAAAABhokghX-CllsfLRGtewRMJm32lgIMxcC0Y3Ww9...,,,,
2,b'gAAAAABhokgh1ngBzHFfMeU6_n8axLHqSLyavtYpeT92...,,,,
3,b'gAAAAABhokghome4YRt6L7pMWweaCZGkTB3lp2c4tZho...,,,,
4,b'gAAAAABhokghMGphrLcoKV4r3-dkw-tWELsuO838OgnG...,,,,
...,...,...,...,...,...
95,b'gAAAAABhokghkeC9jDmqstDGM2LuOT1NwtfMLpO3xgSc...,,,,
96,b'gAAAAABhokghAeP63oBvpu9lpkOPb1jSV4VTbL7qJie0...,,,,
97,b'gAAAAABhokghEGU_KDdPuQhR9AuamNr15kTW8ZeVZmUz...,,,,
98,b'gAAAAABhokgh-P9Jz1vtf4woWLzJyrMnj1_zcgdL3gzU...,,,,


In [13]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokggxZsxWzqXnb9-_t4WG4g_hOAQWCOe422P...,,,,
1,b'gAAAAABhokggAcdAR0RgOG4JcrjgsIU8FXCRHLkGxxFd...,,,,
2,b'gAAAAABhokggndIN4jGXqiEwumsXzbKhJuEKCkLLXSC7...,,,,
3,b'gAAAAABhokggloFkvItEURdO-7N260Ryx5cxZRfxwU3g...,,,,
4,b'gAAAAABhokggKqNHCjiU69JDeLmTeR_iuLaKyHNnBU2f...,,,,
...,...,...,...,...,...
95,b'gAAAAABhokggaG5sQLTuU3zF0uuRKoF5isyd2QJnaqgs...,,,,
96,b'gAAAAABhokggjSDfLiPn6zLCBS6LQEyieStWQfGt8A8-...,,,,
97,b'gAAAAABhokggWGi0htfcWQy75BmIsTyhMGFVeMxniKKC...,,,,
98,b'gAAAAABhokggfDUNZ8Aw25BDT-RKAk913IB6Dwnau70w...,,,,


## Step 3: encrypt other party's self-encrypted set with own private key


In [14]:
# clients encrypt other party's self encrypted set
grab.encrypt_set(True) # set is_other_party to true
gojek.encrypt_set(True)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 3

In [15]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set
0,3444554169003369415400187425444370377909224933...,2578545958466060453428636263208319888439639420...,3592086281017241634746505393053283733740068783...,8770302618384716433333032361846994025627196830...,1110066523883393446461652787419309808727760893...,1040125823461352614866026688358633545722247012...
1,2971259318188054936238701069597951289131550069...,5706214998945741993120430770407017796268789491...,7633350964126484409309452627694517501222527574...,5310231704431437302744741288334690444765811176...,3572395723039591924058214782828015084597107070...,1180250908692475247881689347638984191185531144...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,1229815915116934356023871574874782827041523465...,9669551254738722499074397480261917397203402662...,1561431150929030709725726283927822306366714415...,9201774749032524870434824457276158296915522795...
3,3931935781355731990509341551661525053343436962...,2835741223782226842638506406959946569416451500...,8139458928003081052903827336701217101861095147...,1161828185738055223275160186294651839766678934...,1551053740989404587830241755191742869550553947...,1251127761592340254319881905671617482264533184...
4,5239384073106902936597081070089405716452234004...,1959933681547377774404303441869569823311439691...,1418445934300507413740055109715399683048444206...,3443208215257119583651725732089150910091680518...,1294331147763280189908481434701958404491980705...,5762684737515794491848653417308779949231699430...
...,...,...,...,...,...,...
95,8697318885625964372170362596957150767783631535...,2064846653753480132804612906272453669387169259...,1096791066400963198104128807207516920845522770...,1021065309919929937547475049889662100627000503...,9706933021184065647327475391413724447587556955...,1581853840549155875913145592737420182512926149...
96,1516234710201293820270438455976630785218809049...,8866985291088494051041055141151271041321507008...,7467914080589737191277203346621734518960516771...,3140527904196036531607348919782301327096473948...,8806669514526027438603123663750650894176733035...,5895658274565218998646499712100595078389534709...
97,4322489114223637882196823987215027215086950043...,3948222474783499474687611776797431125471701320...,2659317780621279075993364649577709268729743289...,2583951878794420900057887075859672159727914076...,7586959447037940050158771297396493960387233339...,8284630358528401591798611865535112391715785163...
98,9196222794520213004035745078489116002064573260...,7018556158959834041117086565402060628499966280...,8233606371079115844115653883093993223457940409...,1573161205348916629764635631544172635725013142...,1388095609696086183333974452684582437059714760...,2431621848597426708353526137900379561053734272...


### Files' status after step 3

In [16]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokghuN07GmPvHNefFOs292L2PmcI5wPIvnvc...,b'gAAAAABhokgi6XkjGS1alv1b0-2Y3tq0-crPicmidApw...,,,
1,b'gAAAAABhokghX-CllsfLRGtewRMJm32lgIMxcC0Y3Ww9...,b'gAAAAABhokgiR8u2m265TqzW22ns2eUJfa3qF8zbAGAg...,,,
2,b'gAAAAABhokgh1ngBzHFfMeU6_n8axLHqSLyavtYpeT92...,b'gAAAAABhokgiTKL4Woyjuf-7BjkRDQ5YlqIFErC1FQHf...,,,
3,b'gAAAAABhokghome4YRt6L7pMWweaCZGkTB3lp2c4tZho...,b'gAAAAABhokgidyffgF6jRs1e4uXPOVd3PP3N5sEzA14p...,,,
4,b'gAAAAABhokghMGphrLcoKV4r3-dkw-tWELsuO838OgnG...,b'gAAAAABhokgi7SxjNtwSIX05n2Y1dbTKqpvn3XNQact6...,,,
...,...,...,...,...,...
95,b'gAAAAABhokghkeC9jDmqstDGM2LuOT1NwtfMLpO3xgSc...,b'gAAAAABhokgi-NzGv8i3GJA1DZztb4bmzFf2_utEdnaO...,,,
96,b'gAAAAABhokghAeP63oBvpu9lpkOPb1jSV4VTbL7qJie0...,b'gAAAAABhokgiuOc9i4Yaw-LxfGg_PvFJHPDRuurEEqhx...,,,
97,b'gAAAAABhokghEGU_KDdPuQhR9AuamNr15kTW8ZeVZmUz...,b'gAAAAABhokgiuGtkJOouk0KyWnHucJjdCh5mI2-5Gfyu...,,,
98,b'gAAAAABhokgh-P9Jz1vtf4woWLzJyrMnj1_zcgdL3gzU...,b'gAAAAABhokgimesawAId5xXFwtUcNhCzwWHv3zWjG1Ib...,,,


In [17]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokggxZsxWzqXnb9-_t4WG4g_hOAQWCOe422P...,b'gAAAAABhokghBl6zZx7kez4AS1OUAQZ7OjXGXYHpqAFL...,,,
1,b'gAAAAABhokggAcdAR0RgOG4JcrjgsIU8FXCRHLkGxxFd...,b'gAAAAABhokghPaqzg4bjKj1Iwc8e7oupj4O_JbfPcyHO...,,,
2,b'gAAAAABhokggndIN4jGXqiEwumsXzbKhJuEKCkLLXSC7...,b'gAAAAABhokghTbAWYMkrnK5YnkcgZRXOcAhOyj2bVfMu...,,,
3,b'gAAAAABhokggloFkvItEURdO-7N260Ryx5cxZRfxwU3g...,b'gAAAAABhokghzloZFUwrmnXt4Dw_KpR5GFntJqIenEKm...,,,
4,b'gAAAAABhokggKqNHCjiU69JDeLmTeR_iuLaKyHNnBU2f...,b'gAAAAABhokghcVcANnRcgjm-qmNaexu6OKIepdwRjBz6...,,,
...,...,...,...,...,...
95,b'gAAAAABhokggaG5sQLTuU3zF0uuRKoF5isyd2QJnaqgs...,b'gAAAAABhokghW9tymep0ech99d2cBOWgoLp8G9h5VVHR...,,,
96,b'gAAAAABhokggjSDfLiPn6zLCBS6LQEyieStWQfGt8A8-...,b'gAAAAABhokghAu_CniidFpaZkFqmOM_GfyW9rsMUaJ3C...,,,
97,b'gAAAAABhokggWGi0htfcWQy75BmIsTyhMGFVeMxniKKC...,b'gAAAAABhokgh1l0cSTR0h1gl5cp0x2mA3ab75kkygAUx...,,,
98,b'gAAAAABhokggfDUNZ8Aw25BDT-RKAk913IB6Dwnau70w...,b'gAAAAABhokghL74ZOLmn_cX49mRtNyt8a-wkQdCnk7sG...,,,


## Step 4: find intersection


In [18]:
# clients find intersection
grab.get_intersection()
gojek.get_intersection()

### Clients' status after step 4:
Note: common_values is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 4 is not shown

In [19]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set,
    "gojek found intersection": gojek.common_values,
    "grab found intersection:": grab.common_values}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set,gojek found intersection,grab found intersection:
0,3444554169003369415400187425444370377909224933...,2578545958466060453428636263208319888439639420...,3592086281017241634746505393053283733740068783...,8770302618384716433333032361846994025627196830...,1110066523883393446461652787419309808727760893...,1040125823461352614866026688358633545722247012...,86915509.0,86915509.0
1,2971259318188054936238701069597951289131550069...,5706214998945741993120430770407017796268789491...,7633350964126484409309452627694517501222527574...,5310231704431437302744741288334690444765811176...,3572395723039591924058214782828015084597107070...,1180250908692475247881689347638984191185531144...,99397525.0,99397525.0
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,1229815915116934356023871574874782827041523465...,9669551254738722499074397480261917397203402662...,1561431150929030709725726283927822306366714415...,9201774749032524870434824457276158296915522795...,94391128.0,94391128.0
3,3931935781355731990509341551661525053343436962...,2835741223782226842638506406959946569416451500...,8139458928003081052903827336701217101861095147...,1161828185738055223275160186294651839766678934...,1551053740989404587830241755191742869550553947...,1251127761592340254319881905671617482264533184...,96485172.0,96192082.0
4,5239384073106902936597081070089405716452234004...,1959933681547377774404303441869569823311439691...,1418445934300507413740055109715399683048444206...,3443208215257119583651725732089150910091680518...,1294331147763280189908481434701958404491980705...,5762684737515794491848653417308779949231699430...,96192082.0,96485172.0
...,...,...,...,...,...,...,...,...
95,8697318885625964372170362596957150767783631535...,2064846653753480132804612906272453669387169259...,1096791066400963198104128807207516920845522770...,1021065309919929937547475049889662100627000503...,9706933021184065647327475391413724447587556955...,1581853840549155875913145592737420182512926149...,,
96,1516234710201293820270438455976630785218809049...,8866985291088494051041055141151271041321507008...,7467914080589737191277203346621734518960516771...,3140527904196036531607348919782301327096473948...,8806669514526027438603123663750650894176733035...,5895658274565218998646499712100595078389534709...,,
97,4322489114223637882196823987215027215086950043...,3948222474783499474687611776797431125471701320...,2659317780621279075993364649577709268729743289...,2583951878794420900057887075859672159727914076...,7586959447037940050158771297396493960387233339...,8284630358528401591798611865535112391715785163...,,
98,9196222794520213004035745078489116002064573260...,7018556158959834041117086565402060628499966280...,8233606371079115844115653883093993223457940409...,1573161205348916629764635631544172635725013142...,1388095609696086183333974452684582437059714760...,2431621848597426708353526137900379561053734272...,,


Note: Last 2 columns, unlike the rest of the columns, do not have a one-to-one mapping with other values belonging to the same row i.e. values in the last 2 columns do not have any relation to the other values in the same row as it.

# Check results

In [20]:
# get intersection found by the two parties
gojek_found_intersection = gojek.common_values
grab_found_intersection = grab.common_values


# sort numbers for easier comparison
gojek_found_intersection.sort()
grab_found_intersection.sort()
common_phone_numbers.sort()

# summarize them in a dataframe
d = {"actual": common_phone_numbers,
    "gojek": gojek_found_intersection,
    "grab": grab_found_intersection}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,actual,gojek,grab
0,80497694,80497694,80497694
1,81093373,81093373,81093373
2,86915509,86915509,86915509
3,89312048,89312048,89312048
4,94391128,94391128,94391128
5,95521626,95521626,95521626
6,96192082,96192082,96192082
7,96485172,96485172,96485172
8,99173089,99173089,99173089
9,99397525,99397525,99397525


# Verify no shuffling of encrypted set

## Step 1: Compute sum of common hashed values

In [21]:
# clients compute sum of common hashed values
gojek.compute_sum_of_common_hashed_values()
grab.compute_sum_of_common_hashed_values()

### Clients' status after step 1
Note: sum_of_common_hashed_values is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [22]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values
0,9838169730706049126431649154985480440522462218...,9838169730706049126431649154985480440522462218...,4136707200875143739127692045206988587017779017...,4136707200875143739127692045206988587017779017...
1,2894934995096922601776004424726935937853330465...,2894934995096922601776004424726935937853330465...,,
2,2928980457934649500915797854609194600615950851...,2928980457934649500915797854609194600615950851...,,
3,1103882088551807418117675905119429741273074924...,9996825388533779586759487527820770206563019494...,,
4,9996825388533779586759487527820770206563019494...,1103882088551807418117675905119429741273074924...,,
5,2578545958466060453428636263208319888439639420...,2578545958466060453428636263208319888439639420...,,
6,5385040241161624166328037048478138908841351141...,5385040241161624166328037048478138908841351141...,,
7,1959933681547377774404303441869569823311439691...,2862142013587629757574505891453542162181560643...,,
8,2862142013587629757574505891453542162181560643...,1959933681547377774404303441869569823311439691...,,
9,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,


## Step 2: Encrypt sum of common hashed values with own private key

In [23]:
# clients encrypt sum of common hashed values with their private key
gojek.encrypt_sum_of_common_hashed_values(False) # set is_other_party to false to encrypt own sum of common hashed values
grab.encrypt_sum_of_common_hashed_values(False)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 2 (value of variables in client)

In [24]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values
0,9838169730706049126431649154985480440522462218...,9838169730706049126431649154985480440522462218...,4136707200875143739127692045206988587017779017...,4136707200875143739127692045206988587017779017...,1381027602952099954789007531532175563433257770...,4313002687526156673975737001654467139804877307...
1,2894934995096922601776004424726935937853330465...,2894934995096922601776004424726935937853330465...,,,,
2,2928980457934649500915797854609194600615950851...,2928980457934649500915797854609194600615950851...,,,,
3,1103882088551807418117675905119429741273074924...,9996825388533779586759487527820770206563019494...,,,,
4,9996825388533779586759487527820770206563019494...,1103882088551807418117675905119429741273074924...,,,,
5,2578545958466060453428636263208319888439639420...,2578545958466060453428636263208319888439639420...,,,,
6,5385040241161624166328037048478138908841351141...,5385040241161624166328037048478138908841351141...,,,,
7,1959933681547377774404303441869569823311439691...,2862142013587629757574505891453542162181560643...,,,,
8,2862142013587629757574505891453542162181560643...,1959933681547377774404303441869569823311439691...,,,,
9,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [25]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek["my_self_encrypted_sum_of_hashes"].to_frame().dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes
0,b'gAAAAABhokgiIQIxwPRCFleMxcvM-p8QsWCza793YTYd...


In [26]:
print("grab's file:")
df_grab["my_self_encrypted_sum_of_hashes"].to_frame().dropna(how = "all")

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes
0,b'gAAAAABhokgiaAv8_ZY17o1NHNc26W_ufUuSFDIl9DAT...


## Step 3: Encrypt other party's self-encrypted sum of common hashed values with own private key

In [27]:
# clients encrypt other party's self-encrypted sum of common hashed values with their private key
gojek.encrypt_sum_of_common_hashed_values(True) # set is_other_party to true to encrypt 
                                                # other party's self-encrypted sum of common hashed values
grab.encrypt_sum_of_common_hashed_values(True)

### Clients' status after step 3 (value of variables in client)
Note: actual value is not to be shared yet therefore files' status after step 3 is not shown

In [28]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value
0,9838169730706049126431649154985480440522462218...,9838169730706049126431649154985480440522462218...,4136707200875143739127692045206988587017779017...,4136707200875143739127692045206988587017779017...,1381027602952099954789007531532175563433257770...,4313002687526156673975737001654467139804877307...,9831743967164585972119536631921278872882432903...,9831743967164585972119536631921278872882432903...
1,2894934995096922601776004424726935937853330465...,2894934995096922601776004424726935937853330465...,,,,,,
2,2928980457934649500915797854609194600615950851...,2928980457934649500915797854609194600615950851...,,,,,,
3,1103882088551807418117675905119429741273074924...,9996825388533779586759487527820770206563019494...,,,,,,
4,9996825388533779586759487527820770206563019494...,1103882088551807418117675905119429741273074924...,,,,,,
5,2578545958466060453428636263208319888439639420...,2578545958466060453428636263208319888439639420...,,,,,,
6,5385040241161624166328037048478138908841351141...,5385040241161624166328037048478138908841351141...,,,,,,
7,1959933681547377774404303441869569823311439691...,2862142013587629757574505891453542162181560643...,,,,,,
8,2862142013587629757574505891453542162181560643...,1959933681547377774404303441869569823311439691...,,,,,,
9,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,


## Step 4: Commit to a value 

In [29]:
# clients hash other party's encrypted sum of hashes ((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp
# and send the value (h(((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp)) i.e. write to file
gojek.commit_to_hash_value() 
grab.commit_to_hash_value()

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 4 (value of variables in client)


In [30]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value,
    "gojek committed value": gojek.my_committed_value,
    "grab committed value": grab.my_committed_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value,gojek committed value,grab committed value
0,9838169730706049126431649154985480440522462218...,9838169730706049126431649154985480440522462218...,4136707200875143739127692045206988587017779017...,4136707200875143739127692045206988587017779017...,1381027602952099954789007531532175563433257770...,4313002687526156673975737001654467139804877307...,9831743967164585972119536631921278872882432903...,9831743967164585972119536631921278872882432903...,5849372235586775371673790972279571919451012868...,5849372235586775371673790972279571919451012868...
1,2894934995096922601776004424726935937853330465...,2894934995096922601776004424726935937853330465...,,,,,,,,
2,2928980457934649500915797854609194600615950851...,2928980457934649500915797854609194600615950851...,,,,,,,,
3,1103882088551807418117675905119429741273074924...,9996825388533779586759487527820770206563019494...,,,,,,,,
4,9996825388533779586759487527820770206563019494...,1103882088551807418117675905119429741273074924...,,,,,,,,
5,2578545958466060453428636263208319888439639420...,2578545958466060453428636263208319888439639420...,,,,,,,,
6,5385040241161624166328037048478138908841351141...,5385040241161624166328037048478138908841351141...,,,,,,,,
7,1959933681547377774404303441869569823311439691...,2862142013587629757574505891453542162181560643...,,,,,,,,
8,2862142013587629757574505891453542162181560643...,1959933681547377774404303441869569823311439691...,,,,,,,,
9,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,,,


### Files' status after step 4 (value of variables in file - clients' variables encrypted with Fernet)


In [31]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek[["my_self_encrypted_sum_of_hashes", "my_committed_value"]].dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value
0,b'gAAAAABhokgiIQIxwPRCFleMxcvM-p8QsWCza793YTYd...,b'gAAAAABhokgiRBqTwH-RSF9K7OiuqDhHwPZr0BHZIzRf...


In [32]:
print("grab's file:")
df_grab[["my_self_encrypted_sum_of_hashes", "my_committed_value"]].dropna()

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value
0,b'gAAAAABhokgiaAv8_ZY17o1NHNc26W_ufUuSFDIl9DAT...,b'gAAAAABhokgi5iwkfXIhIQyHuv9a5bX6zn2Hm7pADkM2...


## Step 5: Reveal actual value

In [33]:
# clients send the actual value (((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp) i.e. write to file
grab.reveal_actual_value() # note: grab has to reveal actual value before gojek since gojek was the first to commit to a value
gojek.reveal_actual_value()


### Clients' status after step 5 (value of variables in client)


In [34]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value,
    "gojek committed value": gojek.my_committed_value,
    "grab committed value": grab.my_committed_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df


Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value,gojek committed value,grab committed value
0,9838169730706049126431649154985480440522462218...,9838169730706049126431649154985480440522462218...,4136707200875143739127692045206988587017779017...,4136707200875143739127692045206988587017779017...,1381027602952099954789007531532175563433257770...,4313002687526156673975737001654467139804877307...,9831743967164585972119536631921278872882432903...,9831743967164585972119536631921278872882432903...,5849372235586775371673790972279571919451012868...,5849372235586775371673790972279571919451012868...
1,2894934995096922601776004424726935937853330465...,2894934995096922601776004424726935937853330465...,,,,,,,,
2,2928980457934649500915797854609194600615950851...,2928980457934649500915797854609194600615950851...,,,,,,,,
3,1103882088551807418117675905119429741273074924...,9996825388533779586759487527820770206563019494...,,,,,,,,
4,9996825388533779586759487527820770206563019494...,1103882088551807418117675905119429741273074924...,,,,,,,,
5,2578545958466060453428636263208319888439639420...,2578545958466060453428636263208319888439639420...,,,,,,,,
6,5385040241161624166328037048478138908841351141...,5385040241161624166328037048478138908841351141...,,,,,,,,
7,1959933681547377774404303441869569823311439691...,2862142013587629757574505891453542162181560643...,,,,,,,,
8,2862142013587629757574505891453542162181560643...,1959933681547377774404303441869569823311439691...,,,,,,,,
9,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,,,


Note: No change from step 4

### Files' status after step 5 (value of variables in file - clients' variables encrypted with Fernet)


In [35]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek[["my_self_encrypted_sum_of_hashes", "my_committed_value", "my_actual_value"]].dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokgiIQIxwPRCFleMxcvM-p8QsWCza793YTYd...,b'gAAAAABhokgiRBqTwH-RSF9K7OiuqDhHwPZr0BHZIzRf...,b'gAAAAABhokgjR5fZ23ADoSFXZLGhr8p3rPXRxJFzSzXM...


In [36]:
print("grab's file:")
df_grab[["my_self_encrypted_sum_of_hashes", "my_committed_value", "my_actual_value"]].dropna()

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhokgiaAv8_ZY17o1NHNc26W_ufUuSFDIl9DAT...,b'gAAAAABhokgi5iwkfXIhIQyHuv9a5bX6zn2Hm7pADkM2...,b'gAAAAABhokgjux1I8zYvraXM3_of4Hj7elm2JmEIXgfG...


## Step 6: Verify exchanged values to verify no shuffling of encrypted set

In [37]:
# clients check if the other party's committed value is the hashed value of their actual value
print("gojek: ", end = '')
gojek_match = gojek.check_if_actual_and_committed_values_match()
print("grab: ", end = '')
grab_match = grab.check_if_actual_and_committed_values_match()

# clients check if the other party's actual value are the same as theirs
print("gojek: ", end = '')
gojek_match = gojek.check_if_actual_values_match() and gojek_match
print("grab: ", end = '')
grab_match = grab.check_if_actual_values_match() and grab_match

if grab_match and gojek_match:
    print("No cheating occurred!")
else:
    print("Someone cheated!")

gojek: committed and actual values match!
grab: committed and actual values match!
gojek: actual values match!
grab: actual values match!
No cheating occurred!
