In [1]:
# import libraries (non-cryptogaphic)
import random # to generate phone numbers
import pandas as pd

# import libraries (cryptographic)
import cryptography.hazmat.primitives.asymmetric.dh as dh
from cryptography.fernet import Fernet
import hashlib
import sympy
import secrets

# Generate phone numbers and store them in phone_numbers.csv file

In [2]:
random.seed(10) # to ensure same phone numbers generated every time

In [3]:
# class to generate phone numbers for grab and gojek
class PhoneNumberGenerator:
    def __call__(self, count):
        phone_numbers = random.sample(range(80000000,100000000), count)
        return phone_numbers 

class FakePhoneNumberGenerator:
    def __call__(self, count):
        numbers = random.sample(range(10000000,80000000), count) # invalid phone numbers generated
        return numbers
            
    
# class to store numbers to csv
class PhoneNumberStorageManager:
    def __init__(self):
        self.filename = "phone_numbers_padded.csv"
    def __call__(self, gojek_phone_numbers, grab_phone_numbers):
        d = {"gojek": gojek_phone_numbers, 
            "grab": grab_phone_numbers}
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()])) # create dataframe
        df.to_csv(self.filename, index = False) # store values to file "phone_numbers_padded.csv"

### Edit cell below to change phone number count and set size

In [4]:
# set set-size
set_size = 100 # both parties are to exchange sets of a pre-determined size

# set phone number count
gojek_phone_number_count = 61 # inclusive of phone numbers in common with grab
grab_phone_number_count = 91 # inclusive of phone numbers in common with gojek
common_phone_number_count = 10

In [5]:
# instantiate required classes
phone_number_generator = PhoneNumberGenerator()
fake_phone_number_generator = FakePhoneNumberGenerator()
phone_number_storage_manager = PhoneNumberStorageManager()

# generate phone numbers
phone_numbers = phone_number_generator(gojek_phone_number_count+grab_phone_number_count-common_phone_number_count)
common_phone_numbers = phone_numbers[0:common_phone_number_count]
gojek_phone_numbers = phone_numbers[0:gojek_phone_number_count]
grab_phone_numbers = common_phone_numbers + phone_numbers[gojek_phone_number_count:] 

# generate fake phone numbers
gojek_fake_phone_numbers = fake_phone_number_generator(set_size-gojek_phone_number_count)
grab_fake_phone_numbers = fake_phone_number_generator(set_size-grab_phone_number_count)

# add fake phone numbers to phone numbers
gojek_padded_set = gojek_phone_numbers + gojek_fake_phone_numbers
grab_padded_set = grab_phone_numbers + grab_fake_phone_numbers

# shuffle phone number lists
random.shuffle(gojek_padded_set)
random.shuffle(grab_padded_set)

# write phone numbers (with fake ones) to csv file
phone_number_storage_manager(gojek_padded_set, grab_padded_set)

# Define classes for the necessary for the algorithm

In [6]:
# class to generate numbers required for psi
class NumberGenerator:
    
    def generate_public_parameters(self, size):
        # method to generate p, q and factors of p-1
        p = self.generate_safe_prime(size)
        print(f"p is prime: {sympy.ntheory.isprime(p)}")
        length_of_p = len(bin(p)[2:]) # should be 1024
        print(f"Length of prime modulus, p: {length_of_p}.\nNote: Should be {size}.")
        q = (p-1)//2
        print(f"q is prime: {sympy.ntheory.isprime(q)}")
        factors_pminus1 = [1, 2, q] # since q is prime, 2q only has these 3 factors excluding itself
                                    # 2q = p-1
                                    # factors of p-1 required to compute order of generators (lagrange theorem)
        return p, factors_pminus1

    def generate_safe_prime(self, size):
        # method to generate safe prime for p
        candidate = dh.generate_parameters(2, size).parameter_numbers().p # generate 1024-bit prime number
        while True:
            # the method used from the cryptography already generates a safe prime, this portion is merely to double confirm
            is_safe_prime = sympy.ntheory.isprime((candidate-1)//2) # if safe prime, (candidate-1)/2 is prime
            if (is_safe_prime):
                break
            else:
                candidate = dh.generate_parameters(2, size).parameter_numbers().p
                print(candidate)
 
        return candidate
    
    
    def generate_random_number(self, size):
        # method to generate client's secret
        return secrets.randbits(size)
            
# class to inspect values          
class NumberInspector:
    
    def check_is_primitive_generator(self, candidate, factors_divisorminus1, divisor): # note: factors should be the factors of divisor-1
        
        # apply lagrange theorem
        for possible_order in factors_divisorminus1: # check congruence for all factors (factors is exclusive of p-1 itself)
            result = pow(candidate, possible_order, divisor) # fast modular exponentiation implemented in pow(x,y,z)
                                                             # python integers have arbitrary precisions, 
                                                             # no overflow would occur if operations done in pure python
            if (result == 1):
                return False # if candidate exponent any of the factors (1,2,q) congruent to 1modp, candidate is not a primitive
                             # generator
        return True # order of candidate == p-1, therefore candidate is a primitive generator

    
class StorageManager:
    
    def store_data(self, filename, data):
        df = pd.DataFrame(dict([(k,pd.Series(v, dtype = 'str')) for k,v in data.items()])) # create dataframe
        df.to_csv(filename, index=False) # store data to file with filename


# Create psi client class

In [7]:
# client class (both grab and gojek are clients communicating directly with each other)
class Client:
    def __init__(self, name, other_party_name, private_key_size, phone_numbers, p, factors_pminus1, fernet_key):
        
        self.number_inspector = NumberInspector()
        self.number_generator = NumberGenerator()
        
        # for asymmetric encryption
        self.private_key_size = private_key_size
        self.private_key = self.number_generator.generate_random_number(private_key_size)
        self.my_set = phone_numbers
        self.p = p # prime modulus
        self.factors_pminus1 = factors_pminus1 # to calculate order of hashed phone numbers, 
                                               # since algorithm requires them to be primitive generators
        
        # for symmetric encryption
        self.fernet_key = fernet_key
        self.f = Fernet(fernet_key)
        
        # variables to track for psi algorithm
        self.my_hashed_set = None # h(x)
        self.my_self_encrypted_set = None # (h(x)^(my_secret))modp
        self.my_encrypted_set = None # (h(x)^(my_secret)(other_party_secret))modp
        self.other_party_encrypted_set = None # (h(y)^(my_secret)(other_party_secret))modp
        self.common_values = None # common phone numbers, with invalid phone numbers removed
        
        # variables to ensure no shuffling of encrypted values
        self.my_common_hashes = None # common hashed values with hashed values of invalid phone numbers included
        self.my_sum_of_hashes = None # sum of the common hashed values, h(x_1) + h(x_2) + ... + h(x_n), 
                                     # where n is the number of elements in the intersection of the 2 sets and 
                                     # h(x_i) are the elements in the intersection
        
        self.my_self_encrypted_sum_of_hashes = None # ((my_sum_of_hashes)^(my_secret))modp
        
        self.my_actual_value = None # other_party_encrypted_sum_of_hashes, 
                                    # ((other_party_sum_of_hashes)^(my_secret)(other_party_secret))modp
        self.my_committed_value = None # hash value of other_party_encrypted_sum_of_hashes, h(my_actual_value)

        
        
        # create datafile for communication with another party
        
        # content to store in file
        # need to share self_encrypted_values and other_party_encrypted_values for basic psi algorithm
        # need to share my_self_encrypted_sum_of_hashes, my_committed_value, and my_actual_value for preventing the shuffling of encrypted values
        # common_values was not included unlike previous cases as actual value is now used for verification
        # also, the adversary type changed from semi-honest to malicious, therefore common_values not shared
        self.my_dict = {
            'my_self_encrypted_set': None, 
            'other_party_encrypted_set': None,
            'my_self_encrypted_sum_of_hashes': None,
            'my_committed_value': None,
            'my_actual_value': None
        }
        
        # filenames
        self.name = name
        self.filename = name + "_data_v2.0.csv"
        self.other_party_name = other_party_name
        self.other_party_filename = other_party_name + "_data_v2.0.csv"
        
        # create file
        self.storage_manager = StorageManager()
        self.storage_manager.store_data(self.filename, self.my_dict)

    def hash_to_primitive_root_modulo_p(self, element): 
        # method to hash phone numbers to primitive root modulo p i.e. primitive generator

        endian = "big"
        element = element.to_bytes(4, endian)
        hash_hex = hashlib.sha256(element).hexdigest() # sha3_256
        hash_int = int(hash_hex, 16)
        while True:
            # repeatedly hash until primitive root modulo p is obtained
            is_primitive_generator = self.number_inspector.check_is_primitive_generator(
                hash_int, self.factors_pminus1, self.p
            )
            if (is_primitive_generator):
                break
            else:
                hash_int = hash_int.to_bytes(32, endian)
                hash_hex = hashlib.sha256(hash_int).hexdigest()
                hash_int = int(hash_hex, 16)
                
        return hash_int
    
    def modular_exponentation(self, element):
        # compute (element^(private_key))modp
    
        return pow(element, self.private_key, self.p)
    
    def hash_set(self):
        # hash all phone numbers in my set to primitive root modulo p, one by one
        
        self.my_hashed_set = []
        
        for element in self.my_set:
            hashed_value = self.hash_to_primitive_root_modulo_p(element)
            self.my_hashed_set.append(hashed_value)
            
    def encrypt_set(self, is_other_party, shuffle = False):
        # encrypt all elements in a given set using private_key, one by one
        
        # two scenarios to consider
        # one: encrypt set sent by the other party
        if (is_other_party):
            decrypted_other_party_set = self.receive_data("my_self_encrypted_set")
            other_party_set_int = []
            for element_string in decrypted_other_party_set:
                other_party_set_int.append(int(element_string))
            set_to_encrypt = other_party_set_int
        # two: encrypt my own set
        else:
            set_to_encrypt = self.my_hashed_set
        
        # encrypt values in given set, one by one
        encrypted_values = []
        for element in set_to_encrypt:
            encrypted_value = self.modular_exponentation(element)
            encrypted_values.append(encrypted_value)
            
        # assign the encrypted set to the correct variable
        # update csv file used for communication
        if (is_other_party):
            self.other_party_encrypted_set = encrypted_values
            # shuffle the encrypted set if it is shuffle == true
            if (shuffle):
                shuffled_encrypted_values = encrypted_values
                random.shuffle(shuffled_encrypted_values)
                self.send_data(shuffled_encrypted_values, "other_party_encrypted_set") # send shuffled set to other party
            else:
                self.send_data(encrypted_values, "other_party_encrypted_set")
        else:
            self.my_self_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "my_self_encrypted_set")
                    
        
    def get_intersection(self):
        
        # get intersection
        
        my_encrypted_set = self.receive_data("other_party_encrypted_set") # read my encrypted set from the other party's file
        my_encrypted_set_int = []
        
        # convert read values to integer
        for element in my_encrypted_set:
            my_encrypted_set_int.append(int(element))
            
        # assign to correct variable
        self.my_encrypted_set = my_encrypted_set_int
        
        # get intersection
        encrypted_common_values = set(self.my_encrypted_set).intersection(self.other_party_encrypted_set)
        
        index_of_common_values = []
        
        # find the index of the elements in the intersection in my_encrypted_set
        for element in encrypted_common_values:
            index_of_common_values.append(self.my_encrypted_set.index(element))
            
        self.common_values = [] # common valid phone numbers
        self.my_common_hashes = [] # common hashed values (inclusive of hashed value of invalid phone numbers)
        
        # find the values in my_set and my_hashed_set corresponding to the index of the elements in the intersection
        for index in index_of_common_values:
            potential_phone_number = self.my_set[index]
            
            if ((potential_phone_number-79999999)>0): # remove any potential fake numbers that intersect
                self.common_values.append(potential_phone_number)
                
            self.my_common_hashes.append(self.my_hashed_set[index])
        
    
    
    def compute_sum_of_common_hashed_values(self):
        # compute sum of hashed values in the common intersection
        
        self.my_sum_of_hashes = sum(self.my_common_hashes)
    
    def encrypt_sum_of_common_hashed_values(self, is_other_party):
        # encrypt the sum of common hashed values
        # there are two scenarios
        # one: encrypt other party's self encrypted sum of hashes
        if(is_other_party):
            value_to_encrypt = int((self.receive_data('my_self_encrypted_sum_of_hashes'))[0]) # get other party's self encrypted sum of hashes, 
                                                                                         # ((other_party_sum_of_hashes)^(other_party_secret))modp
        # two: encrypt my_sum_of_hashes
        else:
            value_to_encrypt = self.my_sum_of_hashes # to obtain my_self_encrypted_sum_of_hashes
        
        encrypted_value = self.modular_exponentation(value_to_encrypt) # get ((value_to_encrypt)^(my_secret))modp
        
        # assign to the corresponding variable
        if (is_other_party):
            self.my_actual_value = encrypted_value # do not send this value yet, must send the hash of this first (for commit mechanism)
        else:
            self.my_self_encrypted_sum_of_hashes = encrypted_value
            self.send_data([self.my_self_encrypted_sum_of_hashes], "my_self_encrypted_sum_of_hashes")
        
    
    def hash_value(self, value, byte_size):
        # hash the given value with size = byte_size
        
        endian = "big"
        element = value.to_bytes(byte_size, endian)
        hash_hex = hashlib.sha256(element).hexdigest() # sha3_256
        hash_int = int(hash_hex, 16)
        return hash_int
    
    def commit_to_hash_value(self):
        # hash actual value and send the hashed value
        self.my_committed_value = self.hash_value(self.my_actual_value, self.private_key_size//8)
        self.send_data([self.my_committed_value], "my_committed_value")
    
    
    def reveal_actual_value(self):
        # send the actual value
        
        self.send_data([self.my_actual_value], "my_actual_value")
    
    
    def check_if_actual_and_committed_values_match(self):
        # check if the committed value is the hash of the actual value
        
        # get required values        
         
        other_party_committed_value = int((self.receive_data("my_committed_value"))[0]) # get other_party_committed_value
        other_party_actual_value = int((self.receive_data("my_actual_value"))[0]) # get other_party_actual_value
        hashed_value = self.hash_value(other_party_actual_value, self.private_key_size//8) # hash other_party_actual_value
        
        # check if values match
        if (hashed_value == other_party_committed_value):
            print("committed and actual values match!")
            return True
        else:
            print("committed and actual values do not match!")
            return False
    
    def check_if_actual_values_match(self):
        # check if my actual value is equal to the other party's actual value
        
        other_party_actual_value = int((self.receive_data("my_actual_value"))[0]) # get other_party_actual_value
        
        # check if values match
        if (other_party_actual_value == self.my_actual_value):
            print("actual values match!")
            return True
        else:
            print("actual values do not match!")
            return False
    
    def encrypt_data(self, plaintext):
        # encrypt data with Fernet
        
        endian = "big"
        element = plaintext.to_bytes(self.private_key_size//8, endian) # 1024 bits == 128 bytes
        cipher_text = self.f.encrypt(element)
        return cipher_text
    
    def decrypt_data(self, ciphertext):
        # decrypt data encrypted by Fernet
        
        ciphertext_bytes = ciphertext.encode('utf-8')[2:-1] # convert from string back to bytes
        endian = "big"
        element_in_bytes = self.f.decrypt(ciphertext_bytes)
        plaintext = int.from_bytes(element_in_bytes, endian)
        return plaintext

    def send_data(self, data_to_send, column_name):
        # send data means writing to file. encrypt data with Fernet
        
        # encrypt data
        encrypted_data_to_send = []
        for element in data_to_send:
            encrypted_element = self.encrypt_data(element)
            encrypted_data_to_send.append(encrypted_element)
            
        # send data
        self.my_dict[column_name] = encrypted_data_to_send
        self.storage_manager.store_data(self.filename, self.my_dict)
        
        
    def receive_data(self, column_name):
        # receive data means reading from file (my file). decrypt data encrypted by Fernet
        
        # receive data
        encrypted_data = self.get_other_party_data()[column_name].to_list()
        
        # decrypt data
        decrypted_data = []
        for element in encrypted_data:
            if type(element) is float: # remove NaN
                continue
            decrypted_element = self.decrypt_data(element)
            decrypted_data.append(decrypted_element)
        print(f"Length of read data: {len(decrypted_data)}")
        return decrypted_data
    
    def get_my_data(self):
        # read my file as dataframe (other party's file)
        
        return pd.read_csv(self.filename)
            
    def get_other_party_data(self):
        # read other party's file as dataframe
        
        return pd.read_csv(self.other_party_filename)
    
    


# Initialize context

### Edit cell below to change key size

In [8]:
# assign pre-determined variables for psi
key_size = 1024 # both private keys and large prime

In [9]:
# create key for symmetric key cryptography
fernet_key = Fernet.generate_key()

# create public parameters required for psi
number_generator = NumberGenerator()
p, factors_pminus1 = number_generator.generate_public_parameters(key_size)

# create clients
grab = Client("grab", "gojek", key_size, grab_padded_set, p, factors_pminus1, fernet_key)
gojek = Client("gojek", "grab", key_size, gojek_padded_set, p, factors_pminus1, fernet_key)



p is prime: True
Length of prime modulus, p: 1024.
Note: Should be 1024.
q is prime: True


# Get intersection

## Step 1: Hash phone numbers

In [10]:
# clients hash their own set
grab.hash_set()
gojek.hash_set()

### Clients' status after step 1
Note: hashed set is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [11]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set
0,6652014678762835950508254036300224198617716532...,1014864025876262450241425463322343998244080336...
1,5429498241653143940797762210711587276758607410...,7979890278999879410587410887679716007271632351...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...
3,7165927783480481448708501993505812863126714098...,2851550717787818723928160490729757975939425985...
4,9354400085644697176129851558609086082127592441...,4320682999770379726688099447684191745405506617...
...,...,...
95,4464107689706471902374892139671036333433126606...,2064846653753480132804612906272453669387169259...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...
97,5342186225191643199771955160697963445317631262...,3948222474783499474687611776797431125471701320...
98,5888437846314481919580418714275445066774752189...,7018556158959834041117086565402060628499966280...


## Step 2: encrypt hashed set with own private key

In [12]:
# clients self encrypt hashed set
grab.encrypt_set(False) # set is_other_party to false to encrypt own hashed set
gojek.encrypt_set(False)

### Clients' status after step 2 (value of variables in client)

In [13]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set
0,6652014678762835950508254036300224198617716532...,1014864025876262450241425463322343998244080336...,1954160000791923156212360690812081861655350847...,1359067225075966175234032424399933430441092073...
1,5429498241653143940797762210711587276758607410...,7979890278999879410587410887679716007271632351...,9769884720159259664723396699995400981906656117...,5962025131010983924026822993304974773372632644...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,2730763656564192665329585284612791902340097876...,9099249730016845361879618009942890457794812694...
3,7165927783480481448708501993505812863126714098...,2851550717787818723928160490729757975939425985...,1490069708835017189200675165642759457732255815...,3269848536916430934648822106925362834369158247...
4,9354400085644697176129851558609086082127592441...,4320682999770379726688099447684191745405506617...,5344180189607777344726074648179154287310385124...,8165018911213621027007756417486960336880787732...
...,...,...,...,...
95,4464107689706471902374892139671036333433126606...,2064846653753480132804612906272453669387169259...,9117910514951791331296554723558004005981811820...,8844710490686000657970625591219414034362951608...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,6071703940989366093758688901547967675527715547...,7844120525116892490776750071633658343863418899...
97,5342186225191643199771955160697963445317631262...,3948222474783499474687611776797431125471701320...,2999298487401555314322134278301313162196591871...,6321886006078931086532746041631104344841569589...
98,5888437846314481919580418714275445066774752189...,7018556158959834041117086565402060628499966280...,2533122032463367812563206952953785706460085087...,4957605133745151592799518169230302460072229964...


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [14]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbI8tgTgnGc4ASaZ5ImGyFIf3-U8XIe9wqQ...,,,,
1,b'gAAAAABhoxbIU8SXofDcXBtGjQWIytS2s7QeYdnB5eTb...,,,,
2,b'gAAAAABhoxbIU7lDAMQCbgE0_Uif8VHIKJ6XybIfYiUV...,,,,
3,b'gAAAAABhoxbIy2qvJqBWiVd3MDONmbDG9tcuXhzPMRpZ...,,,,
4,b'gAAAAABhoxbIplsgWHaZOe-tVwjdhtQyiWf1bQ196ZZW...,,,,
...,...,...,...,...,...
95,b'gAAAAABhoxbI8CpVOKT9RazvIIw1IBZgxqH0a1MZYZfQ...,,,,
96,b'gAAAAABhoxbI401N7ROFqPku8ZS9g66EBBI_iKRbQBzn...,,,,
97,b'gAAAAABhoxbIl-0v3GtRsd0SNv2Bg7wO-qvKD1TeH5Ii...,,,,
98,b'gAAAAABhoxbItwPtwSSaeRAajBWRpIXJJPU_jSV-HTPE...,,,,


In [15]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbHZC9fhWpNUtuU0fJcTo5hQ-CoDITCZMuc...,,,,
1,b'gAAAAABhoxbHGzukefJyr6C5IfM2DJZ00hQTFKz1kN89...,,,,
2,b'gAAAAABhoxbHJ9XfpzlXoGjmQM7t4PGZtqMJ0sakWOcX...,,,,
3,b'gAAAAABhoxbHH2V7zeCS_5fh5d8wHIQ0lkEu0Ys9HAPZ...,,,,
4,b'gAAAAABhoxbHbZzfCZiAslSwmiDCm2BnWjDnPUuIJx8T...,,,,
...,...,...,...,...,...
95,b'gAAAAABhoxbH7w7b5dFYFPVo_-5_Ci2UMEUyxsZuDINE...,,,,
96,b'gAAAAABhoxbHhpR_m5hTILugUB4FpSAqhzCRhbtpow_S...,,,,
97,b'gAAAAABhoxbHWSPtrlLDr2E_jKobq5onCSCkUPbDQNYi...,,,,
98,b'gAAAAABhoxbH_NWevkXBUu4vo-ngQN9h_ZG2pStRv8Cf...,,,,


## Step 3: encrypt other party's self-encrypted set with own private key


### Edit cell below to make either client shuffle the encrypted set before sending to the other party (adversarial behavior)

In [16]:
# clients encrypt other party's self encrypted set
grab.encrypt_set(True) # set is_other_party to true
# grab.encrypt_set(True, True) # set shuffle to true to make grab shuffle the encrypted set
gojek.encrypt_set(True)
# gojek.encrypt_set(True, True)

Length of read data: 100
Length of read data: 100


### Clients' status after step 3

In [17]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set
0,6652014678762835950508254036300224198617716532...,1014864025876262450241425463322343998244080336...,1954160000791923156212360690812081861655350847...,1359067225075966175234032424399933430441092073...,4856101770400403964103024116450039931731444690...,8805015346848051552266504784289122044480257929...
1,5429498241653143940797762210711587276758607410...,7979890278999879410587410887679716007271632351...,9769884720159259664723396699995400981906656117...,5962025131010983924026822993304974773372632644...,6281242096836535661612880128421347323612176834...,7218228963014038974374123027484027620698356965...
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,2730763656564192665329585284612791902340097876...,9099249730016845361879618009942890457794812694...,6331272583191922633507105623806125357948430391...,8884714280352879634164725026898968183555507804...
3,7165927783480481448708501993505812863126714098...,2851550717787818723928160490729757975939425985...,1490069708835017189200675165642759457732255815...,3269848536916430934648822106925362834369158247...,8507458565631048240982705985344661965580119357...,1196811465808133322206276095449558418776690732...
4,9354400085644697176129851558609086082127592441...,4320682999770379726688099447684191745405506617...,5344180189607777344726074648179154287310385124...,8165018911213621027007756417486960336880787732...,4864634637018735368035440708872127913945377037...,5019413796578164257014505727163786124208530300...
...,...,...,...,...,...,...
95,4464107689706471902374892139671036333433126606...,2064846653753480132804612906272453669387169259...,9117910514951791331296554723558004005981811820...,8844710490686000657970625591219414034362951608...,3993982219229301888356143139633472788380488023...,8479007453532717387474215573957866964371247709...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,6071703940989366093758688901547967675527715547...,7844120525116892490776750071633658343863418899...,6099424559840902118680384577192803028952559979...,9528132734033565580443482225271256420497826595...
97,5342186225191643199771955160697963445317631262...,3948222474783499474687611776797431125471701320...,2999298487401555314322134278301313162196591871...,6321886006078931086532746041631104344841569589...,7539688212187294917735729312155503976214521551...,1034857590394076654043377781439868735251652010...
98,5888437846314481919580418714275445066774752189...,7018556158959834041117086565402060628499966280...,2533122032463367812563206952953785706460085087...,4957605133745151592799518169230302460072229964...,7323419453780999710899930343813510733308228525...,1259517395668198011526095235211908160385654820...


### Files' status after step 3

In [18]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbI8tgTgnGc4ASaZ5ImGyFIf3-U8XIe9wqQ...,b'gAAAAABhoxbJZpUVrmC8kvCPEAWo4F3KvgfSCkkX-JT5...,,,
1,b'gAAAAABhoxbIU8SXofDcXBtGjQWIytS2s7QeYdnB5eTb...,b'gAAAAABhoxbJKXC1cDzG1SNr2WtWOHvp3JN2BWxzcIn-...,,,
2,b'gAAAAABhoxbIU7lDAMQCbgE0_Uif8VHIKJ6XybIfYiUV...,b'gAAAAABhoxbJYgoupybSfxFKPlv7FQN5Zs_4J2Dw99Bv...,,,
3,b'gAAAAABhoxbIy2qvJqBWiVd3MDONmbDG9tcuXhzPMRpZ...,b'gAAAAABhoxbJLMK6gh1MShjsjGOMN9uvvcFt52sP2jMQ...,,,
4,b'gAAAAABhoxbIplsgWHaZOe-tVwjdhtQyiWf1bQ196ZZW...,b'gAAAAABhoxbJH3n6iNcXpd9P5VEzkXt67L3Ily2Qdxal...,,,
...,...,...,...,...,...
95,b'gAAAAABhoxbI8CpVOKT9RazvIIw1IBZgxqH0a1MZYZfQ...,b'gAAAAABhoxbJHEPs33jWZLDHpIi5BlarqDMHBYl0x0tL...,,,
96,b'gAAAAABhoxbI401N7ROFqPku8ZS9g66EBBI_iKRbQBzn...,b'gAAAAABhoxbJOMKzatCe-twjjrW9D--9xH6oSy4EwulY...,,,
97,b'gAAAAABhoxbIl-0v3GtRsd0SNv2Bg7wO-qvKD1TeH5Ii...,b'gAAAAABhoxbJ9-ypZHEsYj3RCEQKULP_sxNklQuVIqYi...,,,
98,b'gAAAAABhoxbItwPtwSSaeRAajBWRpIXJJPU_jSV-HTPE...,b'gAAAAABhoxbJh5QP31kVdHpyp4tDa_W892Ef3mv-wcjo...,,,


In [19]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbHZC9fhWpNUtuU0fJcTo5hQ-CoDITCZMuc...,b'gAAAAABhoxbI2xXoKmcChM_6MS-Z3wFP04nZHBBBdZbm...,,,
1,b'gAAAAABhoxbHGzukefJyr6C5IfM2DJZ00hQTFKz1kN89...,b'gAAAAABhoxbIhqHy5NfnX0dXr0O_Uu_0DeGiD4F1bBrO...,,,
2,b'gAAAAABhoxbHJ9XfpzlXoGjmQM7t4PGZtqMJ0sakWOcX...,b'gAAAAABhoxbI3p75n0-GPGoa3NSnLSBI_C-s-I0T85sO...,,,
3,b'gAAAAABhoxbHH2V7zeCS_5fh5d8wHIQ0lkEu0Ys9HAPZ...,b'gAAAAABhoxbIgZiEabu6ptihU_OM9EE0As_66TSVAZ2x...,,,
4,b'gAAAAABhoxbHbZzfCZiAslSwmiDCm2BnWjDnPUuIJx8T...,b'gAAAAABhoxbIcpSzZGw2e9HAt0L_wlCy-NeCAkHG3_Qq...,,,
...,...,...,...,...,...
95,b'gAAAAABhoxbH7w7b5dFYFPVo_-5_Ci2UMEUyxsZuDINE...,b'gAAAAABhoxbI9-ajswDN9ijdCN2qENp_J3FyvhkVmF50...,,,
96,b'gAAAAABhoxbHhpR_m5hTILugUB4FpSAqhzCRhbtpow_S...,b'gAAAAABhoxbIdoM5O6SxFjfFfbRDXWUhk-gfpA0E0lIv...,,,
97,b'gAAAAABhoxbHWSPtrlLDr2E_jKobq5onCSCkUPbDQNYi...,b'gAAAAABhoxbIULlvhd8-Zo3IMvBUmUbNYrC3WfOAf8rC...,,,
98,b'gAAAAABhoxbH_NWevkXBUu4vo-ngQN9h_ZG2pStRv8Cf...,b'gAAAAABhoxbIYnE53qPICqE4McZFTnBQqDzros6K6Q8x...,,,


## Step 4: find intersection


In [20]:
# clients find intersection
grab.get_intersection()
gojek.get_intersection()

Length of read data: 100
Length of read data: 100


### Clients' status after step 4:
Note: common_values is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 4 is not shown

In [21]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set,
    "gojek found intersection": gojek.common_values,
    "grab found intersection:": grab.common_values}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set,gojek found intersection,grab found intersection:
0,6652014678762835950508254036300224198617716532...,1014864025876262450241425463322343998244080336...,1954160000791923156212360690812081861655350847...,1359067225075966175234032424399933430441092073...,4856101770400403964103024116450039931731444690...,8805015346848051552266504784289122044480257929...,99173089.0,99173089.0
1,5429498241653143940797762210711587276758607410...,7979890278999879410587410887679716007271632351...,9769884720159259664723396699995400981906656117...,5962025131010983924026822993304974773372632644...,6281242096836535661612880128421347323612176834...,7218228963014038974374123027484027620698356965...,94391128.0,96192082.0
2,5773315736316327673762720174221743416131441931...,8119100922771943066177300284568368128847410924...,2730763656564192665329585284612791902340097876...,9099249730016845361879618009942890457794812694...,6331272583191922633507105623806125357948430391...,8884714280352879634164725026898968183555507804...,96192082.0,94391128.0
3,7165927783480481448708501993505812863126714098...,2851550717787818723928160490729757975939425985...,1490069708835017189200675165642759457732255815...,3269848536916430934648822106925362834369158247...,8507458565631048240982705985344661965580119357...,1196811465808133322206276095449558418776690732...,89312048.0,89312048.0
4,9354400085644697176129851558609086082127592441...,4320682999770379726688099447684191745405506617...,5344180189607777344726074648179154287310385124...,8165018911213621027007756417486960336880787732...,4864634637018735368035440708872127913945377037...,5019413796578164257014505727163786124208530300...,86915509.0,80497694.0
...,...,...,...,...,...,...,...,...
95,4464107689706471902374892139671036333433126606...,2064846653753480132804612906272453669387169259...,9117910514951791331296554723558004005981811820...,8844710490686000657970625591219414034362951608...,3993982219229301888356143139633472788380488023...,8479007453532717387474215573957866964371247709...,,
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,6071703940989366093758688901547967675527715547...,7844120525116892490776750071633658343863418899...,6099424559840902118680384577192803028952559979...,9528132734033565580443482225271256420497826595...,,
97,5342186225191643199771955160697963445317631262...,3948222474783499474687611776797431125471701320...,2999298487401555314322134278301313162196591871...,6321886006078931086532746041631104344841569589...,7539688212187294917735729312155503976214521551...,1034857590394076654043377781439868735251652010...,,
98,5888437846314481919580418714275445066774752189...,7018556158959834041117086565402060628499966280...,2533122032463367812563206952953785706460085087...,4957605133745151592799518169230302460072229964...,7323419453780999710899930343813510733308228525...,1259517395668198011526095235211908160385654820...,,


Note: Last 2 columns, unlike the rest of the columns, do not have a one-to-one mapping with other values belonging to the same row i.e. values in the last 2 columns do not have any relation to the other values in the same row as it.

# Check results

In [22]:
# get intersection found by the two parties
gojek_found_intersection = gojek.common_values
grab_found_intersection = grab.common_values


# sort numbers for easier comparison
gojek_found_intersection.sort()
grab_found_intersection.sort()
common_phone_numbers.sort()

# summarize them in a dataframe
d = {"actual": common_phone_numbers,
    "gojek": gojek_found_intersection,
    "grab": grab_found_intersection}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,actual,gojek,grab
0,80497694,80497694,80497694
1,81093373,81093373,81093373
2,86915509,86915509,86915509
3,89312048,89312048,89312048
4,94391128,94391128,94391128
5,95521626,95521626,95521626
6,96192082,96192082,96192082
7,96485172,96485172,96485172
8,99173089,99173089,99173089
9,99397525,99397525,99397525


# Verify no shuffling of encrypted set

## Step 1: Compute sum of common hashed values

In [23]:
# clients compute sum of common hashed values
gojek.compute_sum_of_common_hashed_values()
grab.compute_sum_of_common_hashed_values()

### Clients' status after step 1
Note: sum_of_common_hashed_values is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [24]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values
0,4320682999770379726688099447684191745405506617...,4320682999770379726688099447684191745405506617...,5621241956667821988738408602452784812336941927...,5621241956667821988738408602452784812336941927...
1,8470801019739050656518087317414089766837623775...,1281724450698210917073112527506677771741219407...,,
2,1281724450698210917073112527506677771741219407...,8470801019739050656518087317414089766837623775...,,
3,1014864025876262450241425463322343998244080336...,1014864025876262450241425463322343998244080336...,,
4,8980623472061033068078176213550735202130364549...,8385332968680517729805644468005968679470991855...,,
5,8385332968680517729805644468005968679470991855...,2862142013587629757574505891453542162181560643...,,
6,2862142013587629757574505891453542162181560643...,2897733162772679702778317671979167074821905176...,,
7,1103882088551807418117675905119429741273074924...,1103882088551807418117675905119429741273074924...,,
8,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,
9,2897733162772679702778317671979167074821905176...,8980623472061033068078176213550735202130364549...,,


## Step 2: Encrypt sum of common hashed values with own private key

In [25]:
# clients encrypt sum of common hashed values with their private key
gojek.encrypt_sum_of_common_hashed_values(False) # set is_other_party to false to encrypt own sum of common hashed values
grab.encrypt_sum_of_common_hashed_values(False)

### Clients' status after step 2 (value of variables in client)

In [26]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values
0,4320682999770379726688099447684191745405506617...,4320682999770379726688099447684191745405506617...,5621241956667821988738408602452784812336941927...,5621241956667821988738408602452784812336941927...,4568627047073890180917101018355802971313330978...,9307148916099770032512052294295423470897901917...
1,8470801019739050656518087317414089766837623775...,1281724450698210917073112527506677771741219407...,,,,
2,1281724450698210917073112527506677771741219407...,8470801019739050656518087317414089766837623775...,,,,
3,1014864025876262450241425463322343998244080336...,1014864025876262450241425463322343998244080336...,,,,
4,8980623472061033068078176213550735202130364549...,8385332968680517729805644468005968679470991855...,,,,
5,8385332968680517729805644468005968679470991855...,2862142013587629757574505891453542162181560643...,,,,
6,2862142013587629757574505891453542162181560643...,2897733162772679702778317671979167074821905176...,,,,
7,1103882088551807418117675905119429741273074924...,1103882088551807418117675905119429741273074924...,,,,
8,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,
9,2897733162772679702778317671979167074821905176...,8980623472061033068078176213550735202130364549...,,,,


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [27]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek["my_self_encrypted_sum_of_hashes"].to_frame().dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes
0,b'gAAAAABhoxbJddWb6ALU8wumrq_o0tbqSFKq2CDUc2N1...


In [28]:
print("grab's file:")
df_grab["my_self_encrypted_sum_of_hashes"].to_frame().dropna(how = "all")

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes
0,b'gAAAAABhoxbJYMoAaC8mDlImRIA2GPBJEElMraVN2sgo...


## Step 3: Encrypt other party's self-encrypted sum of common hashed values with own private key

In [29]:
# clients encrypt other party's self-encrypted sum of common hashed values with their private key
gojek.encrypt_sum_of_common_hashed_values(True) # set is_other_party to true to encrypt 
                                                # other party's self-encrypted sum of common hashed values
grab.encrypt_sum_of_common_hashed_values(True)

Length of read data: 1
Length of read data: 1


### Clients' status after step 3 (value of variables in client)
Note: actual value is not to be shared yet therefore files' status after step 3 is not shown

In [30]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value
0,4320682999770379726688099447684191745405506617...,4320682999770379726688099447684191745405506617...,5621241956667821988738408602452784812336941927...,5621241956667821988738408602452784812336941927...,4568627047073890180917101018355802971313330978...,9307148916099770032512052294295423470897901917...,1414036638766539567122929259269586339093892299...,1414036638766539567122929259269586339093892299...
1,8470801019739050656518087317414089766837623775...,1281724450698210917073112527506677771741219407...,,,,,,
2,1281724450698210917073112527506677771741219407...,8470801019739050656518087317414089766837623775...,,,,,,
3,1014864025876262450241425463322343998244080336...,1014864025876262450241425463322343998244080336...,,,,,,
4,8980623472061033068078176213550735202130364549...,8385332968680517729805644468005968679470991855...,,,,,,
5,8385332968680517729805644468005968679470991855...,2862142013587629757574505891453542162181560643...,,,,,,
6,2862142013587629757574505891453542162181560643...,2897733162772679702778317671979167074821905176...,,,,,,
7,1103882088551807418117675905119429741273074924...,1103882088551807418117675905119429741273074924...,,,,,,
8,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,
9,2897733162772679702778317671979167074821905176...,8980623472061033068078176213550735202130364549...,,,,,,


## Step 4: Commit to a value 

In [31]:
# clients hash other party's encrypted sum of hashes ((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp
# and send the value (h(((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp)) i.e. write to file
gojek.commit_to_hash_value() 
grab.commit_to_hash_value()

### Clients' status after step 4 (value of variables in client)


In [32]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value,
    "gojek committed value": gojek.my_committed_value,
    "grab committed value": grab.my_committed_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value,gojek committed value,grab committed value
0,4320682999770379726688099447684191745405506617...,4320682999770379726688099447684191745405506617...,5621241956667821988738408602452784812336941927...,5621241956667821988738408602452784812336941927...,4568627047073890180917101018355802971313330978...,9307148916099770032512052294295423470897901917...,1414036638766539567122929259269586339093892299...,1414036638766539567122929259269586339093892299...,9203771951702457147715724390768691153158397930...,9203771951702457147715724390768691153158397930...
1,8470801019739050656518087317414089766837623775...,1281724450698210917073112527506677771741219407...,,,,,,,,
2,1281724450698210917073112527506677771741219407...,8470801019739050656518087317414089766837623775...,,,,,,,,
3,1014864025876262450241425463322343998244080336...,1014864025876262450241425463322343998244080336...,,,,,,,,
4,8980623472061033068078176213550735202130364549...,8385332968680517729805644468005968679470991855...,,,,,,,,
5,8385332968680517729805644468005968679470991855...,2862142013587629757574505891453542162181560643...,,,,,,,,
6,2862142013587629757574505891453542162181560643...,2897733162772679702778317671979167074821905176...,,,,,,,,
7,1103882088551807418117675905119429741273074924...,1103882088551807418117675905119429741273074924...,,,,,,,,
8,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,,,
9,2897733162772679702778317671979167074821905176...,8980623472061033068078176213550735202130364549...,,,,,,,,


### Files' status after step 4 (value of variables in file - clients' variables encrypted with Fernet)


In [33]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek[["my_self_encrypted_sum_of_hashes", "my_committed_value"]].dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value
0,b'gAAAAABhoxbJddWb6ALU8wumrq_o0tbqSFKq2CDUc2N1...,b'gAAAAABhoxbKDvkD-KK7rCdPcBEyNaM8rh_knMixkoWC...


In [34]:
print("grab's file:")
df_grab[["my_self_encrypted_sum_of_hashes", "my_committed_value"]].dropna()

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value
0,b'gAAAAABhoxbJYMoAaC8mDlImRIA2GPBJEElMraVN2sgo...,b'gAAAAABhoxbKO70xqsHtXDEMbxN4r0KOH7NRiYPQZgJL...


## Step 5: Reveal actual value

In [35]:
# clients send the actual value (((other_party_sum_of_hashes)^(client_secret)(other_party_secret))modp) i.e. write to file
grab.reveal_actual_value() # note: grab has to reveal actual value before gojek since gojek was the first to commit to a value
gojek.reveal_actual_value()


### Clients' status after step 5 (value of variables in client)


In [36]:
d = {"gojek common hashed values": gojek.my_common_hashes,
     "grab common hashed values": grab.my_common_hashes,
    "gojek sum of common hashed values": gojek.my_sum_of_hashes,
    "grab sum of common hashed values": grab.my_sum_of_hashes,
    "gojek self-encrypted sum of common hashed values": gojek.my_self_encrypted_sum_of_hashes,
    "grab self-encrypted sum of common hashed values": grab.my_self_encrypted_sum_of_hashes,
    "gojek actual value": gojek.my_actual_value,
    "grab actual value": grab.my_actual_value,
    "gojek committed value": gojek.my_committed_value,
    "grab committed value": grab.my_committed_value}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df


Unnamed: 0,gojek common hashed values,grab common hashed values,gojek sum of common hashed values,grab sum of common hashed values,gojek self-encrypted sum of common hashed values,grab self-encrypted sum of common hashed values,gojek actual value,grab actual value,gojek committed value,grab committed value
0,4320682999770379726688099447684191745405506617...,4320682999770379726688099447684191745405506617...,5621241956667821988738408602452784812336941927...,5621241956667821988738408602452784812336941927...,4568627047073890180917101018355802971313330978...,9307148916099770032512052294295423470897901917...,1414036638766539567122929259269586339093892299...,1414036638766539567122929259269586339093892299...,9203771951702457147715724390768691153158397930...,9203771951702457147715724390768691153158397930...
1,8470801019739050656518087317414089766837623775...,1281724450698210917073112527506677771741219407...,,,,,,,,
2,1281724450698210917073112527506677771741219407...,8470801019739050656518087317414089766837623775...,,,,,,,,
3,1014864025876262450241425463322343998244080336...,1014864025876262450241425463322343998244080336...,,,,,,,,
4,8980623472061033068078176213550735202130364549...,8385332968680517729805644468005968679470991855...,,,,,,,,
5,8385332968680517729805644468005968679470991855...,2862142013587629757574505891453542162181560643...,,,,,,,,
6,2862142013587629757574505891453542162181560643...,2897733162772679702778317671979167074821905176...,,,,,,,,
7,1103882088551807418117675905119429741273074924...,1103882088551807418117675905119429741273074924...,,,,,,,,
8,6212011344629679795230035246755687194777942179...,6212011344629679795230035246755687194777942179...,,,,,,,,
9,2897733162772679702778317671979167074821905176...,8980623472061033068078176213550735202130364549...,,,,,,,,


Note: No change from step 4

### Files' status after step 5 (value of variables in file - clients' variables encrypted with Fernet)


In [37]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek[["my_self_encrypted_sum_of_hashes", "my_committed_value", "my_actual_value"]].dropna()

gojek's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbJddWb6ALU8wumrq_o0tbqSFKq2CDUc2N1...,b'gAAAAABhoxbKDvkD-KK7rCdPcBEyNaM8rh_knMixkoWC...,b'gAAAAABhoxbKPeSu9rRAvsL-_bffpMT29PG79JFqJZ2_...


In [38]:
print("grab's file:")
df_grab[["my_self_encrypted_sum_of_hashes", "my_committed_value", "my_actual_value"]].dropna()

grab's file:


Unnamed: 0,my_self_encrypted_sum_of_hashes,my_committed_value,my_actual_value
0,b'gAAAAABhoxbJYMoAaC8mDlImRIA2GPBJEElMraVN2sgo...,b'gAAAAABhoxbKO70xqsHtXDEMbxN4r0KOH7NRiYPQZgJL...,b'gAAAAABhoxbKrwJMtzu5YAwhq3A0SxNfWq637FphW_vd...


## Step 6: Verify exchanged values to verify no shuffling of encrypted set

In [39]:
# clients check if the other party's committed value is the hashed value of their actual value
print("gojek: ", end = '')
gojek_match = gojek.check_if_actual_and_committed_values_match()
print("grab: ", end = '')
grab_match = grab.check_if_actual_and_committed_values_match()

# clients check if the other party's actual value are the same as theirs
print("gojek: ", end = '')
gojek_match = gojek.check_if_actual_values_match() and gojek_match
print("grab: ", end = '')
grab_match = grab.check_if_actual_values_match() and grab_match

if grab_match and gojek_match:
    print("No cheating occurred!")
else:
    print("Someone cheated!")

gojek: Length of read data: 1
Length of read data: 1
committed and actual values match!
grab: Length of read data: 1
Length of read data: 1
committed and actual values match!
gojek: Length of read data: 1
actual values match!
grab: Length of read data: 1
actual values match!
No cheating occurred!
