In [1]:
# import libraries (non-cryptogaphic)
import random # to generate phone numbers
import pandas as pd
from functools import reduce
import os

# import libraries (cryptographic)
import cryptography.hazmat.primitives.asymmetric.dh as dh
from cryptography.fernet import Fernet
import hashlib
import sympy
import secrets

# Generate phone numbers and store them in phone_numbers.csv file

In [2]:
random.seed(10) # to ensure same phone numbers generated every time

In [3]:
# class to generate phone numbers for grab and gojek
class PhoneNumberGenerator:
    def __call__(self, count):
        phone_numbers = random.sample(range(80000000,100000000), count)
        return phone_numbers 

class FakePhoneNumberGenerator:
    def __call__(self, count):
        numbers = random.sample(range(10000000,80000000), count) # invalid phone numbers generated
        return numbers
            
    
# class to store numbers to csv
class PhoneNumberStorageManager:
    def __init__(self):
        self.filename = "phone_numbers_padded.csv"
    def __call__(self, gojek_phone_numbers, grab_phone_numbers):
        d = {"gojek": gojek_phone_numbers, 
            "grab": grab_phone_numbers}
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()])) # create dataframe
        df.to_csv(self.filename, index = False)

In [4]:
set_size = 100 # both parties are to exchange sets of a pre-determined size

# generate and store phone numbers
gojek_phone_number_count = 61 # inclusive of phone numbers in common with grab
grab_phone_number_count = 91 # inclusive of phone numbers in common with gojek
common_phone_number_count = 10

# instantiate required classes
phone_number_generator = PhoneNumberGenerator()
fake_phone_number_generator = FakePhoneNumberGenerator()
phone_number_storage_manager = PhoneNumberStorageManager()

# generate phone numbers
phone_numbers = phone_number_generator(gojek_phone_number_count+grab_phone_number_count-common_phone_number_count)
common_phone_numbers = phone_numbers[0:common_phone_number_count]
gojek_phone_numbers = phone_numbers[0:gojek_phone_number_count]
grab_phone_numbers = common_phone_numbers + phone_numbers[gojek_phone_number_count:] 

# generate fake phone numbers
gojek_fake_phone_numbers = fake_phone_number_generator(set_size-gojek_phone_number_count)
grab_fake_phone_numbers = fake_phone_number_generator(set_size-grab_phone_number_count)

# add fake phone numbers to phone numbers
gojek_padded_set = gojek_phone_numbers + gojek_fake_phone_numbers
grab_padded_set = grab_phone_numbers + grab_fake_phone_numbers

# shuffle phone number lists
random.shuffle(gojek_padded_set)
random.shuffle(grab_padded_set)

# write phone numbers (with fake ones) to csv file
phone_number_storage_manager(gojek_padded_set, grab_padded_set)

# Define classes for the necessary for the algorithm

In [5]:
# class to generate clients' private secret
class NumberGenerator:
    
    def generate_public_parameters(self, size):
        p = self.generate_safe_prime(size)
        print(f"p is prime: {sympy.ntheory.isprime(p)}")
        length_of_p = len(bin(p)[2:]) # should be 1024
        print(f"Length of prime modulus, p: {length_of_p}.\nNote: Should be {size}.")
        q = (p-1)//2
        print(f"q is prime: {sympy.ntheory.isprime(q)}")
        factors_pminus1 = [1, 2, q]
        
        return p, factors_pminus1

    def generate_safe_prime(self, size):
        candidate = dh.generate_parameters(2, size).parameter_numbers().p # generate 1024-bit prime number
        while True:
            is_safe_prime = sympy.ntheory.isprime((candidate-1)//2) # if safe prime, (candidate-1)/2 is prime
            if (is_safe_prime):
                break
            else:
                candidate = dh.generate_parameters(2, size).parameter_numbers().p
                print(candidate)
 
        return candidate
    
    def generate_random_number(self, size):
        return secrets.randbits(size)
            
# class to inspect values          
class NumberInspector:
    
    def check_is_primitive_generator(self, candidate, factors_divisorminus1, divisor): # note: factors should be the factors of divisor-1
        
        # apply lagrange theorem
        for possible_order in factors_divisorminus1: # check congruence for all factors (factors is exclusive of p-1 itself)
            result = pow(candidate, possible_order, divisor) # fast modular exponentiation
            if (result == 1):
                return False
            
        return True 

    
class StorageManager:
    
    def store_data(self, filename, data):
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe
        df.to_csv(filename, index=False)

# Create psi client class

In [6]:
# client class (both grab and gojek are clients communicating directly with each other)
class Client:
    def __init__(self, name, other_party_name, private_key_size, phone_numbers, p, factors_pminus1, fernet_key):
        
        self.number_inspector = NumberInspector()
        self.number_generator = NumberGenerator()
        
        # for asymmetric encryption
        self.private_key = self.number_generator.generate_random_number(private_key_size)
        self.my_set = phone_numbers
        self.p = p
        self.factors_pminus1 = factors_pminus1
        
        # for symmetric encryption
        self.fernet_key = fernet_key
        self.f = Fernet(fernet_key)
        
        self.my_hashed_set = None
        self.my_self_encrypted_set = None
        self.my_encrypted_set = None
        self.other_party_encrypted_set = None
        self.common_values = None
        
        # create datafile for communication with another party
        # only need to share self_encrypted_values and other_party_encrypted_values
        self.my_dict = {
            'my_self_encrypted_set': None, 
            'other_party_encrypted_set': None,
            'common_values': None
        }
        
        self.name = name
        self.filename = name + "_data_v1.2.csv"
        self.other_party_name = other_party_name
        self.other_party_filename = other_party_name + "_data_v1.2.csv"
        
        self.storage_manager = StorageManager()
        self.storage_manager.store_data(self.filename, self.my_dict)

    def hash_to_primitive_root_modulo_p(self, element):     
        endian = "big"
        element = element.to_bytes(4, endian)
        hash_hex = hashlib.sha256(element).hexdigest() #sha3_256
        hash_int = int(hash_hex, 16)
        while True:
            is_primitive_generator = self.number_inspector.check_is_primitive_generator(
                hash_int, self.factors_pminus1, self.p
            )
            if (is_primitive_generator):
                break
            else:
                hash_int = hash_int.to_bytes(32, endian)
                hash_hex = hashlib.sha256(hash_int).hexdigest()
                hash_int = int(hash_hex, 16)
        return hash_int
    
    def modular_exponentation(self, element):
        return pow(element, self.private_key, self.p)
    
    def hash_set(self):
        
        self.my_hashed_set = []
        
        for element in self.my_set:
            hashed_value = self.hash_to_primitive_root_modulo_p(element)
            self.my_hashed_set.append(hashed_value)
            
    def encrypt_set(self, is_other_party):
        
        if (is_other_party):
            decrypted_other_party_set = self.receive_data("my_self_encrypted_set")
            other_party_set_int = []
            for element_string in decrypted_other_party_set:
                other_party_set_int.append(int(element_string))
            set_to_encrypt = other_party_set_int
        else:
            set_to_encrypt = self.my_hashed_set
        
        encrypted_values = []
        for element in set_to_encrypt:
            encrypted_value = self.modular_exponentation(element)
            encrypted_values.append(encrypted_value)
            
        if (is_other_party):
            self.other_party_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "other_party_encrypted_set")
        else:
            self.my_self_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "my_self_encrypted_set")
                    
        
    def get_intersection(self):
        
        my_encrypted_set = self.receive_data("other_party_encrypted_set") # encrypted refers to asymmetric_crypto

        my_encrypted_set_int = []
        for element in my_encrypted_set:
            my_encrypted_set_int.append(int(element))
        self.my_encrypted_set = my_encrypted_set_int
        encrypted_common_values = set(self.my_encrypted_set).intersection(self.other_party_encrypted_set)
        index_of_common_values = []
        
        for element in encrypted_common_values:
            index_of_common_values.append(self.my_encrypted_set.index(element))
            
        self.common_values = []
        
        for index in index_of_common_values:
            potential_phone_number = self.my_set[index]
            if ((potential_phone_number-79999999)>0): # remove any potential fake numbers that intersect
                self.common_values.append(potential_phone_number)
        self.my_dict["common_values"] = self.common_values
        self.send_data(self.common_values, "common_values")
    
    def encrypt_data(self, plaintext):
        endian = "big"
        element = plaintext.to_bytes(128, endian) # 1024 bits == 128 bytes
        cipher_text = self.f.encrypt(element)
        return cipher_text
    
    def decrypt_data(self, ciphertext):
        ciphertext_bytes = ciphertext.encode('utf-8')[2:-1] # convert from string back to bytes
        endian = "big"
        element_in_bytes = self.f.decrypt(ciphertext_bytes)
        plaintext = int.from_bytes(element_in_bytes, endian)
        return plaintext

    def send_data(self, data_to_send, column_name):
        # send data means writing to file
        encrypted_data_to_send = []
        for element in data_to_send:
            encrypted_element = self.encrypt_data(element)
            encrypted_data_to_send.append(encrypted_element)
        self.my_dict[column_name] = encrypted_data_to_send
        self.storage_manager.store_data(self.filename, self.my_dict)
        
        
    def receive_data(self, column_name):
        # receive data means reading from file
        encrypted_data = self.get_other_party_data()[column_name].to_list()
        decrypted_data = []
        for element in encrypted_data:
            if type(element) is float:
                continue
            decrypted_element = self.decrypt_data(element)
            decrypted_data.append(decrypted_element)
        print(f"number of elements in {self.other_party_name};{column_name}: {len(decrypted_data)}.")
        return decrypted_data
            
    def get_my_data(self):
        return pd.read_csv(self.filename)
    
    def get_other_party_data(self):
        return pd.read_csv(self.other_party_filename)
    
    


# Initialize context

In [7]:
# assign pre-determined variables for psi
key_size = 1024 # both private keys and large prime

# create key for symmetric key cryptography
fernet_key = Fernet.generate_key()

# create public parameters required for psi
number_generator = NumberGenerator()
p, factors_pminus1 = number_generator.generate_public_parameters(key_size)

# create clients
grab = Client("grab", "gojek", key_size, grab_padded_set, p, factors_pminus1, fernet_key)
gojek = Client("gojek", "grab", key_size, gojek_padded_set, p, factors_pminus1, fernet_key)



p is prime: True
Length of prime modulus, p: 1024.
Note: Should be 1024.
q is prime: True


  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


# Get intersection

## Step 1: Hash phone numbers

In [8]:
# clients hash their own set
grab.hash_set()
gojek.hash_set()

### Clients' status after step 1
Note: hashed set is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [9]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set
0,3444554169003369415400187425444370377909224933...,1014864025876262450241425463322343998244080336...
1,3134205932310820582994345236779128274482638005...,6555446118383117843414628061923602991736146568...
2,5773315736316327673762720174221743416131441931...,2408490635946548862425293650149893980450258320...
3,5203358287300148779441149003132562463607163612...,4314342032682825508775434884617987666037471300...
4,5239384073106902936597081070089405716452234004...,4320682999770379726688099447684191745405506617...
...,...,...
95,8092639001204638790178908996283896242788771916...,2064846653753480132804612906272453669387169259...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...
97,1050996311389914722852387111588332931098320412...,7426436912110168070067487716578277968813643062...
98,9196222794520213004035745078489116002064573260...,3348291576564155323004778324399929055136385733...


## Step 2: encrypt hashed set with own private key

In [10]:
# clients self encrypt hashed set
grab.encrypt_set(False) # set is_other_party to false to encrypt own hashed set
gojek.encrypt_set(False)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 2 (value of variables in client)

In [11]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set
0,3444554169003369415400187425444370377909224933...,1014864025876262450241425463322343998244080336...,4501146644921474793844938285551005771669082211...,7211205826182732818936148899799083665290210405...
1,3134205932310820582994345236779128274482638005...,6555446118383117843414628061923602991736146568...,9662592724395124702843977854843742158144303561...,4371685597636754583145143962653086369666919718...
2,5773315736316327673762720174221743416131441931...,2408490635946548862425293650149893980450258320...,8710978190008393812185385108090280738926867877...,4124638598073210844414549040486460318719740825...
3,5203358287300148779441149003132562463607163612...,4314342032682825508775434884617987666037471300...,8478037679887378835596707930335362068192787865...,7641695233119428146978739041051583067412923330...
4,5239384073106902936597081070089405716452234004...,4320682999770379726688099447684191745405506617...,1273026878062007769408924689428915731965704708...,7709358120393206342316577661277739249828350295...
...,...,...,...,...
95,8092639001204638790178908996283896242788771916...,2064846653753480132804612906272453669387169259...,2361977100082053641583857150449954552588327657...,1461371820530994469233786981164505064416917514...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,2647671168101668367188098623322506224272465319...,9628878055862029581360682335514413206711290033...
97,1050996311389914722852387111588332931098320412...,7426436912110168070067487716578277968813643062...,4936613208925839838136059642089657487283003646...,5364834539000174421608105695669462772666723133...
98,9196222794520213004035745078489116002064573260...,3348291576564155323004778324399929055136385733...,3278602762142776414496509947755485200528529716...,1192449054345711265953824003196944617369875252...


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [12]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6FZZjU9b8uLqyWfAr7wnW9QlGeVd5to5Vc...,,
1,b'gAAAAABhoT6FHlC00ozGUQ3jAtVwX26amLNrNVKFRd5X...,,
2,b'gAAAAABhoT6FjeSSjkKKNJTxFKptIjpGvrfWyA2d-nEF...,,
3,b'gAAAAABhoT6FMCBKE5nq5sgjoEJ4mWlTEZ_VQnfhraWP...,,
4,b'gAAAAABhoT6FFFzFzqVDK7k-9HrIDwD58-VcyRoW7tg9...,,
...,...,...,...
95,b'gAAAAABhoT6FMxVFKXS0BauxeIQQtG-e01UphHsRzmv8...,,
96,b'gAAAAABhoT6FKCfw39Vx14qNCdh-gL5qSj_UotPiGoP9...,,
97,b'gAAAAABhoT6FTYnw6TpuoiXfA7daTQbmw-2KWpd5pbt7...,,
98,b'gAAAAABhoT6FAZ-HxxZvnKpd88qyZSUKiUk7Q33NmhAG...,,


In [13]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6F3Lh9Ae0YEO_0F6bNg7pn27_VHmU_3qR0...,,
1,b'gAAAAABhoT6Fw4iNkKuwLZH7YJTuvH5h5saTxtDOHNEJ...,,
2,b'gAAAAABhoT6FsIa-98eZKBfGSLZgMfOvYYjfHlxZBfw5...,,
3,b'gAAAAABhoT6FeCPhYLUpbKA-WKco8fuz_-XGApUuPCQe...,,
4,b'gAAAAABhoT6F-ejpbZ4HKK_QDsCsIZ9r_vy9j0z6JDhI...,,
...,...,...,...
95,b'gAAAAABhoT6FPuX93RsQ_uuqXJDm9g2xHC9KTlD8TztE...,,
96,b'gAAAAABhoT6FHZaWx1Fbz14OvJ5KOOUc010DLTEcj-H3...,,
97,b'gAAAAABhoT6F5ASRqMNSInfrqz0iGDt3PxnCjGw4nkZx...,,
98,b'gAAAAABhoT6FoZngyLiCOSUsqvZ51lb74uR8PXhxSwX8...,,


## Step 3: encrypt other party's self-encrypted set with own private key


In [14]:
# clients encrypt other party's self encrypted set
grab.encrypt_set(True) # set is_other_party to true
gojek.encrypt_set(True)

number of elements in gojek;my_self_encrypted_set: 100.


  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


number of elements in grab;my_self_encrypted_set: 100.


### Clients' status after step 3

In [15]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set
0,3444554169003369415400187425444370377909224933...,1014864025876262450241425463322343998244080336...,4501146644921474793844938285551005771669082211...,7211205826182732818936148899799083665290210405...,8756458742145467870207520050170870461641378674...,7086956033284197776852838851267786520807609473...
1,3134205932310820582994345236779128274482638005...,6555446118383117843414628061923602991736146568...,9662592724395124702843977854843742158144303561...,4371685597636754583145143962653086369666919718...,3480127876432675239397977138844652204804456649...,4228870427192147187998575320630049264837012054...
2,5773315736316327673762720174221743416131441931...,2408490635946548862425293650149893980450258320...,8710978190008393812185385108090280738926867877...,4124638598073210844414549040486460318719740825...,1341290949923304629290024170610107968811063790...,4884027567161449016496728986968256288717114677...
3,5203358287300148779441149003132562463607163612...,4314342032682825508775434884617987666037471300...,8478037679887378835596707930335362068192787865...,7641695233119428146978739041051583067412923330...,8700841606126646095954136145627922419926836944...,8002435343006517467581888717776221549550727064...
4,5239384073106902936597081070089405716452234004...,4320682999770379726688099447684191745405506617...,1273026878062007769408924689428915731965704708...,7709358120393206342316577661277739249828350295...,1088970797436968243071025130771792888486810409...,1254911382606588753441645237293255575304577880...
...,...,...,...,...,...,...
95,8092639001204638790178908996283896242788771916...,2064846653753480132804612906272453669387169259...,2361977100082053641583857150449954552588327657...,1461371820530994469233786981164505064416917514...,5824415790668950262895534225699738343902901381...,1026879721346563555648662960544156151124383661...
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,2647671168101668367188098623322506224272465319...,9628878055862029581360682335514413206711290033...,5055315469325893522786099382983587926726654314...,9825223028438531193701601102863148397094987176...
97,1050996311389914722852387111588332931098320412...,7426436912110168070067487716578277968813643062...,4936613208925839838136059642089657487283003646...,5364834539000174421608105695669462772666723133...,7673300956259079119071194876897082801362698813...,8914936012490974460631866458986392879645366317...
98,9196222794520213004035745078489116002064573260...,3348291576564155323004778324399929055136385733...,3278602762142776414496509947755485200528529716...,1192449054345711265953824003196944617369875252...,1489264762302454620851533113588539320755786804...,6407456483292052575639243498572173877946715130...


### Files' status after step 3

In [16]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek



gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6FZZjU9b8uLqyWfAr7wnW9QlGeVd5to5Vc...,b'gAAAAABhoT6HuDkK_1z3SP3BduQLoi8GDnli20QjDPNT...,
1,b'gAAAAABhoT6FHlC00ozGUQ3jAtVwX26amLNrNVKFRd5X...,b'gAAAAABhoT6HjX61VUPu_JfO8XkT8u47cWcatJZEj7iC...,
2,b'gAAAAABhoT6FjeSSjkKKNJTxFKptIjpGvrfWyA2d-nEF...,b'gAAAAABhoT6HkCYsx2x5q9qMMThIpho2rL_DN-kjFqi0...,
3,b'gAAAAABhoT6FMCBKE5nq5sgjoEJ4mWlTEZ_VQnfhraWP...,b'gAAAAABhoT6HxiU8ALgyxHChZa2pDSW3eY8KPYFDhkHx...,
4,b'gAAAAABhoT6FFFzFzqVDK7k-9HrIDwD58-VcyRoW7tg9...,b'gAAAAABhoT6HYT18yHowIRphcZrLvm3iiboMf1wmeCZc...,
...,...,...,...
95,b'gAAAAABhoT6FMxVFKXS0BauxeIQQtG-e01UphHsRzmv8...,b'gAAAAABhoT6Ht0i3rc4QMq6XAfOk6BlC14vDR4G8pA_R...,
96,b'gAAAAABhoT6FKCfw39Vx14qNCdh-gL5qSj_UotPiGoP9...,b'gAAAAABhoT6HIR8lZfp4P1RMy5S7eIv2cuCXWoApK4P-...,
97,b'gAAAAABhoT6FTYnw6TpuoiXfA7daTQbmw-2KWpd5pbt7...,b'gAAAAABhoT6HR4ZGbw72A56ZqRNMezc8AZC8ulGNFR6r...,
98,b'gAAAAABhoT6FAZ-HxxZvnKpd88qyZSUKiUk7Q33NmhAG...,b'gAAAAABhoT6HZhPCQpgFDXkh6so5ItKeOmdHbFjPpxiv...,


In [17]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6F3Lh9Ae0YEO_0F6bNg7pn27_VHmU_3qR0...,b'gAAAAABhoT6GIrijnesmPUA4GkKBz-y9HPadhjnfI1NO...,
1,b'gAAAAABhoT6Fw4iNkKuwLZH7YJTuvH5h5saTxtDOHNEJ...,b'gAAAAABhoT6Goj7i7wolZVbZzF7ZuPimeMtotqQY0n0j...,
2,b'gAAAAABhoT6FsIa-98eZKBfGSLZgMfOvYYjfHlxZBfw5...,b'gAAAAABhoT6GUNz71ymIJtQFuVjlG1rVdca53sqj0lKj...,
3,b'gAAAAABhoT6FeCPhYLUpbKA-WKco8fuz_-XGApUuPCQe...,b'gAAAAABhoT6GluFq4wSGkX7vAXrAZ2GTSiTq54aGdwKk...,
4,b'gAAAAABhoT6F-ejpbZ4HKK_QDsCsIZ9r_vy9j0z6JDhI...,b'gAAAAABhoT6GbeRD-GUa-a3zbktmL1J4cRLkdgmjKXUY...,
...,...,...,...
95,b'gAAAAABhoT6FPuX93RsQ_uuqXJDm9g2xHC9KTlD8TztE...,b'gAAAAABhoT6G6QFBWAcQwl1b_jBUMG28JiMNtsfy746R...,
96,b'gAAAAABhoT6FHZaWx1Fbz14OvJ5KOOUc010DLTEcj-H3...,b'gAAAAABhoT6GwZz8c27IJm3cjieuRJrC7oeu7OD6RHb1...,
97,b'gAAAAABhoT6F5ASRqMNSInfrqz0iGDt3PxnCjGw4nkZx...,b'gAAAAABhoT6G4fikDEN8ILPnNoA51rqTB-vd0UdWOPjA...,
98,b'gAAAAABhoT6FoZngyLiCOSUsqvZ51lb74uR8PXhxSwX8...,b'gAAAAABhoT6GEwlX6h-DAPCXC9FGyo0Xj96ZHUrGUJEW...,


## Step 4: find intersection


In [18]:
# clients find intersection
grab.get_intersection()
gojek.get_intersection()

number of elements in gojek;other_party_encrypted_set: 100.
number of elements in grab;other_party_encrypted_set: 100.


### Clients' status after step 4:

In [19]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set,
    "gojek found intersection": gojek.common_values,
    "grab found intersection:": grab.common_values}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set,gojek found intersection,grab found intersection:
0,3444554169003369415400187425444370377909224933...,1014864025876262450241425463322343998244080336...,4501146644921474793844938285551005771669082211...,7211205826182732818936148899799083665290210405...,8756458742145467870207520050170870461641378674...,7086956033284197776852838851267786520807609473...,86915509.0,86915509.0
1,3134205932310820582994345236779128274482638005...,6555446118383117843414628061923602991736146568...,9662592724395124702843977854843742158144303561...,4371685597636754583145143962653086369666919718...,3480127876432675239397977138844652204804456649...,4228870427192147187998575320630049264837012054...,80497694.0,80497694.0
2,5773315736316327673762720174221743416131441931...,2408490635946548862425293650149893980450258320...,8710978190008393812185385108090280738926867877...,4124638598073210844414549040486460318719740825...,1341290949923304629290024170610107968811063790...,4884027567161449016496728986968256288717114677...,99397525.0,99397525.0
3,5203358287300148779441149003132562463607163612...,4314342032682825508775434884617987666037471300...,8478037679887378835596707930335362068192787865...,7641695233119428146978739041051583067412923330...,8700841606126646095954136145627922419926836944...,8002435343006517467581888717776221549550727064...,99173089.0,99173089.0
4,5239384073106902936597081070089405716452234004...,4320682999770379726688099447684191745405506617...,1273026878062007769408924689428915731965704708...,7709358120393206342316577661277739249828350295...,1088970797436968243071025130771792888486810409...,1254911382606588753441645237293255575304577880...,81093373.0,81093373.0
...,...,...,...,...,...,...,...,...
95,8092639001204638790178908996283896242788771916...,2064846653753480132804612906272453669387169259...,2361977100082053641583857150449954552588327657...,1461371820530994469233786981164505064416917514...,5824415790668950262895534225699738343902901381...,1026879721346563555648662960544156151124383661...,,
96,7960204644377876874342383407634690809702051347...,8866985291088494051041055141151271041321507008...,2647671168101668367188098623322506224272465319...,9628878055862029581360682335514413206711290033...,5055315469325893522786099382983587926726654314...,9825223028438531193701601102863148397094987176...,,
97,1050996311389914722852387111588332931098320412...,7426436912110168070067487716578277968813643062...,4936613208925839838136059642089657487283003646...,5364834539000174421608105695669462772666723133...,7673300956259079119071194876897082801362698813...,8914936012490974460631866458986392879645366317...,,
98,9196222794520213004035745078489116002064573260...,3348291576564155323004778324399929055136385733...,3278602762142776414496509947755485200528529716...,1192449054345711265953824003196944617369875252...,1489264762302454620851533113588539320755786804...,6407456483292052575639243498572173877946715130...,,


Note: Last 2 columns, unlike the rest of the columns, do not have a one-to-one mapping with other values belonging to the same row i.e. values in the last 2 columns do not have any relation to the other values in the same row as it.

### Files' status after step 4

In [20]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6FZZjU9b8uLqyWfAr7wnW9QlGeVd5to5Vc...,b'gAAAAABhoT6HuDkK_1z3SP3BduQLoi8GDnli20QjDPNT...,b'gAAAAABhoT6HqY9UOVy1Cugjq3OE-TgtK9p1QvooRDEw...
1,b'gAAAAABhoT6FHlC00ozGUQ3jAtVwX26amLNrNVKFRd5X...,b'gAAAAABhoT6HjX61VUPu_JfO8XkT8u47cWcatJZEj7iC...,b'gAAAAABhoT6HScVUFGXnL8_GOyeEsQWsBpV3Eq7mBu34...
2,b'gAAAAABhoT6FjeSSjkKKNJTxFKptIjpGvrfWyA2d-nEF...,b'gAAAAABhoT6HkCYsx2x5q9qMMThIpho2rL_DN-kjFqi0...,b'gAAAAABhoT6HPOQ3k_FStqSfvWAPrTnKduJA41HnWXPB...
3,b'gAAAAABhoT6FMCBKE5nq5sgjoEJ4mWlTEZ_VQnfhraWP...,b'gAAAAABhoT6HxiU8ALgyxHChZa2pDSW3eY8KPYFDhkHx...,b'gAAAAABhoT6Hp7riWskTgr91PUcReWwcVXZPhsLmxkHd...
4,b'gAAAAABhoT6FFFzFzqVDK7k-9HrIDwD58-VcyRoW7tg9...,b'gAAAAABhoT6HYT18yHowIRphcZrLvm3iiboMf1wmeCZc...,b'gAAAAABhoT6H037Xi_Dd4ecMsD6V29VQsAmSwb02apkw...
...,...,...,...
95,b'gAAAAABhoT6FMxVFKXS0BauxeIQQtG-e01UphHsRzmv8...,b'gAAAAABhoT6Ht0i3rc4QMq6XAfOk6BlC14vDR4G8pA_R...,
96,b'gAAAAABhoT6FKCfw39Vx14qNCdh-gL5qSj_UotPiGoP9...,b'gAAAAABhoT6HIR8lZfp4P1RMy5S7eIv2cuCXWoApK4P-...,
97,b'gAAAAABhoT6FTYnw6TpuoiXfA7daTQbmw-2KWpd5pbt7...,b'gAAAAABhoT6HR4ZGbw72A56ZqRNMezc8AZC8ulGNFR6r...,
98,b'gAAAAABhoT6FAZ-HxxZvnKpd88qyZSUKiUk7Q33NmhAG...,b'gAAAAABhoT6HZhPCQpgFDXkh6so5ItKeOmdHbFjPpxiv...,


In [21]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoT6F3Lh9Ae0YEO_0F6bNg7pn27_VHmU_3qR0...,b'gAAAAABhoT6GIrijnesmPUA4GkKBz-y9HPadhjnfI1NO...,b'gAAAAABhoT6H9TVa4t6LMoLXnfhZeU4wZ5nsWpq38d0_...
1,b'gAAAAABhoT6Fw4iNkKuwLZH7YJTuvH5h5saTxtDOHNEJ...,b'gAAAAABhoT6Goj7i7wolZVbZzF7ZuPimeMtotqQY0n0j...,b'gAAAAABhoT6Hr0MRVqbjVIJviuBD_kcvR59E_pR2eSJ3...
2,b'gAAAAABhoT6FsIa-98eZKBfGSLZgMfOvYYjfHlxZBfw5...,b'gAAAAABhoT6GUNz71ymIJtQFuVjlG1rVdca53sqj0lKj...,b'gAAAAABhoT6HNLLzlesOyuL7loh4jk8c4FPQiBZ4F0_f...
3,b'gAAAAABhoT6FeCPhYLUpbKA-WKco8fuz_-XGApUuPCQe...,b'gAAAAABhoT6GluFq4wSGkX7vAXrAZ2GTSiTq54aGdwKk...,b'gAAAAABhoT6HkaKHlGiised5zerYBPcWjyoVKIqH9ml6...
4,b'gAAAAABhoT6F-ejpbZ4HKK_QDsCsIZ9r_vy9j0z6JDhI...,b'gAAAAABhoT6GbeRD-GUa-a3zbktmL1J4cRLkdgmjKXUY...,b'gAAAAABhoT6HogCn75-zX01vLqumYXpVFprNgVQ8-JSY...
...,...,...,...
95,b'gAAAAABhoT6FPuX93RsQ_uuqXJDm9g2xHC9KTlD8TztE...,b'gAAAAABhoT6G6QFBWAcQwl1b_jBUMG28JiMNtsfy746R...,
96,b'gAAAAABhoT6FHZaWx1Fbz14OvJ5KOOUc010DLTEcj-H3...,b'gAAAAABhoT6GwZz8c27IJm3cjieuRJrC7oeu7OD6RHb1...,
97,b'gAAAAABhoT6F5ASRqMNSInfrqz0iGDt3PxnCjGw4nkZx...,b'gAAAAABhoT6G4fikDEN8ILPnNoA51rqTB-vd0UdWOPjA...,
98,b'gAAAAABhoT6FoZngyLiCOSUsqvZ51lb74uR8PXhxSwX8...,b'gAAAAABhoT6GEwlX6h-DAPCXC9FGyo0Xj96ZHUrGUJEW...,


# Check results

In [22]:
# get intersection found by the two parties
gojek_found_intersection = gojek.common_values
grab_found_intersection = grab.common_values


# sort numbers for easier comparison
gojek_found_intersection.sort()
grab_found_intersection.sort()
common_phone_numbers.sort()

# summarize them in a dataframe
d = {"actual": common_phone_numbers,
    "gojek": gojek_found_intersection,
    "grab": grab_found_intersection}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,actual,gojek,grab
0,80497694,80497694,80497694
1,81093373,81093373,81093373
2,86915509,86915509,86915509
3,89312048,89312048,89312048
4,94391128,94391128,94391128
5,95521626,95521626,95521626
6,96192082,96192082,96192082
7,96485172,96485172,96485172
8,99173089,99173089,99173089
9,99397525,99397525,99397525
