# About
## v1.1 - Prevent main-in-the-middle attack with symmetric key encryption!
This version improves on `v1.0 - The basic algorithm` by preventing man-in-the-middle attack.  
This is done by encrypting all messages with a symmetric key encryption algorithm, in our case, `AES128` in `CBC` mode.  
Detailed explanations about this version can be found in the READme and the video.

In [1]:
# import libraries (non-cryptogaphic)
import random # to generate phone numbers
import pandas as pd


# import libraries (cryptographic)
import cryptography.hazmat.primitives.asymmetric.dh as dh
from cryptography.fernet import Fernet
import hashlib
import sympy
import secrets

# Generate phone numbers and store them in "phone_numbers.csv"

In [2]:
random.seed(10) # to ensure same phone numbers generated every time

In [3]:
class PhoneNumberGenerator:
    # class to generate phone numbers for grab and gojek

    def __call__(self, count):
        phone_numbers = random.sample(range(80000000,100000000), count)
        return phone_numbers 

    
class PhoneNumberStorageManager:
    # class to store numbers to csv

    def __init__(self):
        self.filename = "phone_numbers.csv"
    def __call__(self, gojek_phone_numbers, grab_phone_numbers):
        d = {"gojek": gojek_phone_numbers, 
            "grab": grab_phone_numbers}
        df = pd.DataFrame(dict([(k,pd.Series(v, dtype = "str")) for k,v in d.items()])) # create dataframe
        df.to_csv(self.filename, index = False) # store values to file "phone_numbers.csv"

### Edit cell below to change phone number count

In [4]:
# set phone number count
gojek_phone_number_count = 61 # inclusive of phone numbers in common with grab
grab_phone_number_count = 91 # inclusive of phone numbers in common with gojek
common_phone_number_count = 10

In [5]:
# instantiate required classes
phone_number_generator = PhoneNumberGenerator()
phone_number_storage_manager = PhoneNumberStorageManager()

# generate phone numbers
phone_numbers = phone_number_generator(gojek_phone_number_count+grab_phone_number_count-common_phone_number_count)
common_phone_numbers = phone_numbers[0:common_phone_number_count]
gojek_phone_numbers = phone_numbers[0:gojek_phone_number_count]
grab_phone_numbers = common_phone_numbers + phone_numbers[gojek_phone_number_count:] 

#shuffle phone number lists
random.shuffle(gojek_phone_numbers)
random.shuffle(grab_phone_numbers)

# write phone numbers to csv file
phone_number_storage_manager(gojek_phone_numbers, grab_phone_numbers)

# Define classes for the necessary for the algorithm

In [6]:
class NumberGenerator:
    # class to generate numbers required for psi

    def generate_public_parameters(self, size):
        # method to generate p, q and factors of p-1
        p = self.generate_safe_prime(size)
        print(f"p is prime: {sympy.ntheory.isprime(p)}")
        length_of_p = len(bin(p)[2:]) # should be 1024
        print(f"Length of prime modulus, p: {length_of_p}.\nNote: Should be {size}.")
        q = (p-1)//2
        print(f"q is prime: {sympy.ntheory.isprime(q)}")
        factors_pminus1 = [1, 2, q] # since q is prime, 2q only has these 3 factors excluding itself
                                    # 2q = p-1
                                    # factors of p-1 required to compute order of generators (lagrange theorem)
        return p, factors_pminus1

    def generate_safe_prime(self, size):
        # method to generate safe prime for p
        candidate = dh.generate_parameters(2, size).parameter_numbers().p # generate 1024-bit prime number
        while True:
            # the method used from the cryptography already generates a safe prime, this portion is merely to double confirm
            is_safe_prime = sympy.ntheory.isprime((candidate-1)//2) # if safe prime, (candidate-1)/2 is prime
            if (is_safe_prime):
                break
            else:
                candidate = dh.generate_parameters(2, size).parameter_numbers().p
                print(candidate)
 
        return candidate
    
    
    def generate_random_number(self, size):
        # method to generate client's secret
        return secrets.randbits(size)
            
class NumberInspector:
    # class to inspect values          

    def check_is_primitive_generator(self, candidate, factors_divisorminus1, divisor): # note: factors should be the factors of divisor-1
        
        # apply lagrange theorem
        for possible_order in factors_divisorminus1: # check congruence for all factors (factors is exclusive of p-1 itself)
            result = pow(candidate, possible_order, divisor) # fast modular exponentiation implemented in pow(x,y,z)
                                                             # python integers have arbitrary precisions, 
                                                             # no overflow would occur if operations done in pure python
            if (result == 1):
                return False # if candidate exponent any of the factors (1,2,q) congruent to 1modp, candidate is not a primitive
                             # generator
        return True # order of candidate == p-1, therefore candidate is a primitive generator

class StorageManager:
    # class to store data that needs to be sent to another party to designated file      

    def store_data(self, filename, data):
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe
        df.to_csv(filename, index=False) # store data to file with filename

                


    
        

# Create psi client class

In [7]:
class Client:
    # client class (both grab and gojek are clients communicating directly with each other)
    
    def __init__(self, name, other_party_name, private_key_size, phone_numbers, p, factors_pminus1, fernet_key):
        
        self.number_inspector = NumberInspector()
        self.number_generator = NumberGenerator()
        
        # for asymmetric encryption
        self.private_key = self.number_generator.generate_random_number(private_key_size)
        self.my_set = phone_numbers
        self.p = p # prime modulus
        self.factors_pminus1 = factors_pminus1 # to calculate order of hashed phone numbers, 
                                               # since algorithm requires them to be primitive generators
        
        # for symmetric encryption
        self.fernet_key = fernet_key
        self.f = Fernet(fernet_key)
        
        # values to track for psi algorithm
        self.my_hashed_set = None # h(x)
        self.my_self_encrypted_set = None # (h(x)^(my_secret))modp
        self.my_encrypted_set = None # (h(x)^(my_secret)(other_party_secret))modp
        self.other_party_encrypted_set = None # (h(y)^(my_secret)(other_party_secret))modp
        self.common_values = None # common phone numbers
        
        
        # create datafile for communication with another party
        
        # content to store in file
        # only need to share self_encrypted_values and other_party_encrypted_values
        # common_values to ensure both calculate the same intersection
        self.my_dict = {
            'my_self_encrypted_set': None, 
            'other_party_encrypted_set': None,
            'common_values': None
        }
        
        # filenames
        self.name = name
        self.filename = name + "_data_v1.1.csv"
        self.other_party_name = other_party_name
        self.other_party_filename = other_party_name + "_data_v1.1.csv"
        
        # create file
        self.storage_manager = StorageManager()
        self.storage_manager.store_data(self.filename, self.my_dict)

    def hash_to_primitive_root_modulo_p(self, element): 
        # method to hash phone numbers to primitive root modulo p i.e. primitive generator

        endian = "big"
        element = element.to_bytes(4, endian)
        hash_hex = hashlib.sha256(element).hexdigest() # sha3_256
        hash_int = int(hash_hex, 16)
        while True:
            # repeatedly hash until primitive root modulo p is obtained
            is_primitive_generator = self.number_inspector.check_is_primitive_generator(
                hash_int, self.factors_pminus1, self.p
            )
            if (is_primitive_generator):
                break
            else:
                hash_int = hash_int.to_bytes(32, endian)
                hash_hex = hashlib.sha256(hash_int).hexdigest()
                hash_int = int(hash_hex, 16)
                
        return hash_int
    
    def modular_exponentation(self, element):
        # compute (element^(private_key))modp

        return pow(element, self.private_key, self.p)
    
    def hash_set(self):
        # hash all phone numbers in my set to primitive root modulo p, one by one

        self.my_hashed_set = []
        
        for element in self.my_set:
            hashed_value = self.hash_to_primitive_root_modulo_p(element)
            self.my_hashed_set.append(hashed_value)
            
    def encrypt_set(self, is_other_party):
        # encrypt all elements in a given set using private_key, one by one
        
        # two scenarios to consider
        # one: encrypt set sent by the other party
        if (is_other_party):
            decrypted_other_party_set = self.receive_data("my_self_encrypted_set")
            other_party_set_int = []
            for element_string in decrypted_other_party_set:
                other_party_set_int.append(int(element_string))
            set_to_encrypt = other_party_set_int
        # two: encrypt my own set
        else:
            set_to_encrypt = self.my_hashed_set
        
        # encrypt values in given set, one by one
        encrypted_values = []
        for element in set_to_encrypt:
            encrypted_value = self.modular_exponentation(element)
            encrypted_values.append(encrypted_value)
            
        # assign the encrypted set to the correct variable
        # update csv file used for communication
        if (is_other_party):
            self.other_party_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "other_party_encrypted_set")
        else:
            self.my_self_encrypted_set = encrypted_values
            self.send_data(encrypted_values, "my_self_encrypted_set")
                    
        
    def get_intersection(self):
        # get intersection
        
        my_encrypted_set = self.receive_data("other_party_encrypted_set") # read my encrypted set from the other party's file
        my_encrypted_set_int = []
        
        # convert read values to integer
        for element in my_encrypted_set:
            my_encrypted_set_int.append(int(element))
            
        # assign to correct variable
        self.my_encrypted_set = my_encrypted_set_int
        
        # get intersection
        encrypted_common_values = set(self.my_encrypted_set).intersection(self.other_party_encrypted_set)
        index_of_common_values = []
        
        # find the index of the elements in the intersection in my_encrypted_set
        for element in encrypted_common_values:
            index_of_common_values.append(self.my_encrypted_set.index(element))
            
        self.common_values = []
        
        # find the values in my own set corresponding to the index of the elements in the intersection
        for index in index_of_common_values:
            self.common_values.append(self.my_set[index])
            
        # update csv file for communication
        self.send_data(self.common_values, "common_values")
    
    def encrypt_data(self, plaintext):
        # encrypt data with Fernet
        
        endian = "big"
        element = plaintext.to_bytes(128, endian) # 1024 bits == 128 bytes
        cipher_text = self.f.encrypt(element)
        return cipher_text
    
    def decrypt_data(self, ciphertext):
        # decrypt data encrypted by Fernet
        
        ciphertext_bytes = ciphertext.encode('utf-8')[2:-1] # convert from string back to bytes
        endian = "big"
        element_in_bytes = self.f.decrypt(ciphertext_bytes)
        plaintext = int.from_bytes(element_in_bytes, endian)
        return plaintext

    def send_data(self, data_to_send, column_name):
        # send data means writing to file. encrypt data with Fernet
        
        # encrypt data
        encrypted_data_to_send = []
        for element in data_to_send:
            encrypted_element = self.encrypt_data(element)
            encrypted_data_to_send.append(encrypted_element)
            
        # send data
        self.my_dict[column_name] = encrypted_data_to_send
        self.storage_manager.store_data(self.filename, self.my_dict)
        
        
    def receive_data(self, column_name):
        # receive data means reading from file (my file). decrypt data encrypted by Fernet
        
        # receive data
        encrypted_data = self.get_other_party_data()[column_name].to_list()
        
        # decrypt data
        decrypted_data = []
        for element in encrypted_data:
            if type(element) is float: # remove NaN
                continue
            decrypted_element = self.decrypt_data(element)
            decrypted_data.append(decrypted_element)
        return decrypted_data
    
    def get_my_data(self):
        # read my file as dataframe (other party's file)
        
        return pd.read_csv(self.filename)
            
    def get_other_party_data(self):
        # read other party's file as dataframe
        
        return pd.read_csv(self.other_party_filename)
    


# Initialize context

### Edit cell below to change key size

In [8]:
# assign pre-determined variables for psi
key_size = 1024 # both private keys and large prime

In [9]:
# create key for symmetric key cryptography
fernet_key = Fernet.generate_key()

# create public parameters required for psi
number_generator = NumberGenerator()
p, factors_pminus1 = number_generator.generate_public_parameters(key_size)

# create clients
grab = Client("grab", "gojek", key_size, grab_phone_numbers, p, factors_pminus1, fernet_key)
gojek = Client("gojek", "grab", key_size, gojek_phone_numbers, p, factors_pminus1, fernet_key)



p is prime: True
Length of prime modulus, p: 1024.
Note: Should be 1024.
q is prime: True


  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


# Get intersection

## Step 1: Hash phone numbers

In [10]:
# clients hash their own set
grab.hash_set()
gojek.hash_set()

### Clients' status after step 1 (value of variables in client)
Note: hashed set is not stored in the file as it is not meant to be shared with the other party hence, files' status after step 1 is not shown

In [11]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set
0,8665856184731841630588915710571595248439800538...,1751659343934249496930093781941606327015039027...
1,8697318885625964372170362596957150767783631535...,5930688136427591374346485570270456146366904370...
2,2953623139145187618071647113313426330528576167...,7322471674394226755197759817505338232518241311...
3,4271190063387899149690355220761209149002412790...,1897687944268595422506089474931262713576137973...
4,1959933681547377774404303441869569823311439691...,3864964095203405700528472210556810964594978309...
...,...,...
86,,3922283845005472934707328127251028806214862373...
87,,3683371775181761031500661295222456997304332645...
88,,8119100922771943066177300284568368128847410924...
89,,5011913842513548422134267800545138946266586949...


## Step 2: encrypt hashed set with own private key

In [12]:
# clients self encrypt hashed set
grab.encrypt_set(False) # set is_other_party to false to encrypt own hashed set
gojek.encrypt_set(False)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 2 (value of variables in client)

In [13]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set
0,8665856184731841630588915710571595248439800538...,1751659343934249496930093781941606327015039027...,9430417840080229131610584124787600846661578681...,1660745803563415734462883672985162265724479666...
1,8697318885625964372170362596957150767783631535...,5930688136427591374346485570270456146366904370...,5278345570226336233982504918895393604653046903...,1131950721362527330440462087243803862023032407...
2,2953623139145187618071647113313426330528576167...,7322471674394226755197759817505338232518241311...,1215557842416546977123216899248745112672222639...,8477747267499227848721965693182096833656046277...
3,4271190063387899149690355220761209149002412790...,1897687944268595422506089474931262713576137973...,7684351709009417683475515136141119645563879708...,3430479753360146141779817567954495825470013574...
4,1959933681547377774404303441869569823311439691...,3864964095203405700528472210556810964594978309...,5846310158850103598690659668764873194912007708...,6715784285028477685205249148914204136131340009...
...,...,...,...,...
86,,3922283845005472934707328127251028806214862373...,,8897379927365696007558261241776404213264536216...
87,,3683371775181761031500661295222456997304332645...,,1802928496823922694951216659334133583687236885...
88,,8119100922771943066177300284568368128847410924...,,3775726870587539746775391177885702286716963804...
89,,5011913842513548422134267800545138946266586949...,,4337120390621199879262345053965529778388011229...


### Files' status after step 2 (value of variables in file - clients' variables encrypted with Fernet)

In [14]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtZjXyT1d9JOb_hYn5V2Lc5LHLHB0FrMfA...,,
1,b'gAAAAABhoyRttMoRH-YDnWmm0BkiG_ptrHBeGlzkMLMF...,,
2,b'gAAAAABhoyRtYBoCAsKgDgUB2NqCc-_gRGBol33AEuCW...,,
3,b'gAAAAABhoyRtZN9f8-oOZ8cyNek5W4999Tzi3ptMZo0B...,,
4,b'gAAAAABhoyRtmJa5X8nt0fdqWGHg9I9WxTuPNdY-Sgfb...,,
...,...,...,...
56,b'gAAAAABhoyRtC905j_P7rLgxQoQAp8QV_9E9PQ25HKlb...,,
57,b'gAAAAABhoyRtmzKXR1MsXvHcSA7qMCrxkS0trGq0bXlj...,,
58,b'gAAAAABhoyRtv67vuQCIUadu1izZqM6FA736gpgveZUW...,,
59,b'gAAAAABhoyRt77YLGI1kcW2wi7gMdY4Mbjk9VkNnnZ0r...,,


In [15]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtQjSn2xUuHAjrXeJ7UZKRXCn_enW_ZGgB...,,
1,b'gAAAAABhoyRtpq-E39Fn3VxbXYKY7C9zYRhxOueA3exQ...,,
2,b'gAAAAABhoyRtsPKY6y7WZ-Bstc6kko-994DbFM0wsjIm...,,
3,b'gAAAAABhoyRtNGXIRwi7A-Qc5xiihDIavTDxkm0gWyUa...,,
4,b'gAAAAABhoyRt8Ir3Y7lomRhPWcChLjuzfyyPujOaN3a_...,,
...,...,...,...
86,b'gAAAAABhoyRtFQjSWAtGXyQR2gAcUHZKUrYs3_HEDRt0...,,
87,b'gAAAAABhoyRtlAEqD8SRwJumtV-VE41gSh_LlgTVTG9d...,,
88,b'gAAAAABhoyRtMUqiT1cUE1-oov5hUAn887MSFxiEpWO0...,,
89,b'gAAAAABhoyRtJtUobAyaf5rZbh2-DgjUV-68o2qAX2oI...,,


## Step 3: encrypt other party's self-encrypted set with own private key


In [16]:
# clients encrypt other party's self encrypted set
grab.encrypt_set(True) # set is_other_party to true
gojek.encrypt_set(True)

  df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in data.items()])) # create dataframe


### Clients' status after step 3 (value of variables in client)

In [17]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set
0,8665856184731841630588915710571595248439800538...,1751659343934249496930093781941606327015039027...,9430417840080229131610584124787600846661578681...,1660745803563415734462883672985162265724479666...,2744925491455708882216533955759768084309261738...,9910524849779838725831103341577364550267196752...
1,8697318885625964372170362596957150767783631535...,5930688136427591374346485570270456146366904370...,5278345570226336233982504918895393604653046903...,1131950721362527330440462087243803862023032407...,7935144737355830234572790122213012595126395170...,1044862300392754760490154320724386891539806691...
2,2953623139145187618071647113313426330528576167...,7322471674394226755197759817505338232518241311...,1215557842416546977123216899248745112672222639...,8477747267499227848721965693182096833656046277...,1597729557127998307872545677113890556259978353...,3799349634205893516775695892043953919859201052...
3,4271190063387899149690355220761209149002412790...,1897687944268595422506089474931262713576137973...,7684351709009417683475515136141119645563879708...,3430479753360146141779817567954495825470013574...,1273244004336780956479326979418650316268881497...,1611709868662819149684782837197467167338889088...
4,1959933681547377774404303441869569823311439691...,3864964095203405700528472210556810964594978309...,5846310158850103598690659668764873194912007708...,6715784285028477685205249148914204136131340009...,1231972516638591295662108568610265525934292785...,1539202207485739645358873837105694098207722703...
...,...,...,...,...,...,...
86,,3922283845005472934707328127251028806214862373...,,8897379927365696007558261241776404213264536216...,,2456036763609746487964099303634549458300674188...
87,,3683371775181761031500661295222456997304332645...,,1802928496823922694951216659334133583687236885...,,9382323634754329472517264310342000578586262764...
88,,8119100922771943066177300284568368128847410924...,,3775726870587539746775391177885702286716963804...,,1567083070196050550442151864040475083967611056...
89,,5011913842513548422134267800545138946266586949...,,4337120390621199879262345053965529778388011229...,,3517686568580457818110216651233365803425271987...


### Files' status after step 3 (value of variables in file - clients' variables encrypted with Fernet)

In [18]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek



gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtZjXyT1d9JOb_hYn5V2Lc5LHLHB0FrMfA...,b'gAAAAABhoyRuHXVzkaFhxIAozpLcf3tGiX_Y3Qv9qij4...,
1,b'gAAAAABhoyRttMoRH-YDnWmm0BkiG_ptrHBeGlzkMLMF...,b'gAAAAABhoyRuzJHG_3T8LCdVMB1T8zrq7uhXYOnSjcLY...,
2,b'gAAAAABhoyRtYBoCAsKgDgUB2NqCc-_gRGBol33AEuCW...,b'gAAAAABhoyRuUbp0TjXsYHNCzRvM3WgT4cVpTZdR-bD6...,
3,b'gAAAAABhoyRtZN9f8-oOZ8cyNek5W4999Tzi3ptMZo0B...,b'gAAAAABhoyRusO_hll-qeQS5i4UNQrFN7HArRO_ZGtzu...,
4,b'gAAAAABhoyRtmJa5X8nt0fdqWGHg9I9WxTuPNdY-Sgfb...,b'gAAAAABhoyRuyf9AL3JPxj4qZDP596uK_tYWUtNh_SWG...,
...,...,...,...
86,,b'gAAAAABhoyRur6aZCJyP6tJXpcMczAcQEtCeryJrXl_y...,
87,,b'gAAAAABhoyRuYGyVxaBF6za_PATDQP1uBxAt0YKnUlZz...,
88,,b'gAAAAABhoyRu87a4PiVUtHyVQ397GS4CKNiPW1OdznDX...,
89,,b'gAAAAABhoyRub0JL28szNeR2IK7p1w4dFgC1QYetBB0z...,


In [19]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtQjSn2xUuHAjrXeJ7UZKRXCn_enW_ZGgB...,b'gAAAAABhoyRuzmVAG_IdWi8QxYvz5FPIi3ZUSaL51bZA...,
1,b'gAAAAABhoyRtpq-E39Fn3VxbXYKY7C9zYRhxOueA3exQ...,b'gAAAAABhoyRuax3yzZyzsqNEdO_k--mG6YaWyAB5pqkG...,
2,b'gAAAAABhoyRtsPKY6y7WZ-Bstc6kko-994DbFM0wsjIm...,b'gAAAAABhoyRuoHucbkJ3i1As4MjH5nzDsh-SfXSd3edI...,
3,b'gAAAAABhoyRtNGXIRwi7A-Qc5xiihDIavTDxkm0gWyUa...,b'gAAAAABhoyRuGnli0RqhNR_nZzFiTKiXW_xiJ9NpixjZ...,
4,b'gAAAAABhoyRt8Ir3Y7lomRhPWcChLjuzfyyPujOaN3a_...,b'gAAAAABhoyRuiTdwoaQ0nS3pABEdIwC58-auGp4ZJngU...,
...,...,...,...
86,b'gAAAAABhoyRtFQjSWAtGXyQR2gAcUHZKUrYs3_HEDRt0...,,
87,b'gAAAAABhoyRtlAEqD8SRwJumtV-VE41gSh_LlgTVTG9d...,,
88,b'gAAAAABhoyRtMUqiT1cUE1-oov5hUAn887MSFxiEpWO0...,,
89,b'gAAAAABhoyRtJtUobAyaf5rZbh2-DgjUV-68o2qAX2oI...,,


## Step 4: find intersection


In [20]:
# clients find intersection
grab.get_intersection()
gojek.get_intersection()

### Clients' status after step 4 (value of variables in client)

In [21]:
d = {"gojek hashed set": gojek.my_hashed_set,
    "grab hashed set": grab.my_hashed_set,
    "gojek self-encrypted set": gojek.my_self_encrypted_set,
    "grab self-encrypted set": grab.my_self_encrypted_set,
    "gojek encrypted set": grab.other_party_encrypted_set,
    "grab encrypted set": gojek.other_party_encrypted_set,
    "gojek found intersection": gojek.common_values,
    "grab found intersection:": grab.common_values}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,gojek hashed set,grab hashed set,gojek self-encrypted set,grab self-encrypted set,gojek encrypted set,grab encrypted set,gojek found intersection,grab found intersection:
0,8665856184731841630588915710571595248439800538...,1751659343934249496930093781941606327015039027...,9430417840080229131610584124787600846661578681...,1660745803563415734462883672985162265724479666...,2744925491455708882216533955759768084309261738...,9910524849779838725831103341577364550267196752...,81093373.0,95521626.0
1,8697318885625964372170362596957150767783631535...,5930688136427591374346485570270456146366904370...,5278345570226336233982504918895393604653046903...,1131950721362527330440462087243803862023032407...,7935144737355830234572790122213012595126395170...,1044862300392754760490154320724386891539806691...,95521626.0,81093373.0
2,2953623139145187618071647113313426330528576167...,7322471674394226755197759817505338232518241311...,1215557842416546977123216899248745112672222639...,8477747267499227848721965693182096833656046277...,1597729557127998307872545677113890556259978353...,3799349634205893516775695892043953919859201052...,86915509.0,86915509.0
3,4271190063387899149690355220761209149002412790...,1897687944268595422506089474931262713576137973...,7684351709009417683475515136141119645563879708...,3430479753360146141779817567954495825470013574...,1273244004336780956479326979418650316268881497...,1611709868662819149684782837197467167338889088...,96192082.0,96192082.0
4,1959933681547377774404303441869569823311439691...,3864964095203405700528472210556810964594978309...,5846310158850103598690659668764873194912007708...,6715784285028477685205249148914204136131340009...,1231972516638591295662108568610265525934292785...,1539202207485739645358873837105694098207722703...,99173089.0,99173089.0
...,...,...,...,...,...,...,...,...
86,,3922283845005472934707328127251028806214862373...,,8897379927365696007558261241776404213264536216...,,2456036763609746487964099303634549458300674188...,,
87,,3683371775181761031500661295222456997304332645...,,1802928496823922694951216659334133583687236885...,,9382323634754329472517264310342000578586262764...,,
88,,8119100922771943066177300284568368128847410924...,,3775726870587539746775391177885702286716963804...,,1567083070196050550442151864040475083967611056...,,
89,,5011913842513548422134267800545138946266586949...,,4337120390621199879262345053965529778388011229...,,3517686568580457818110216651233365803425271987...,,


Note: Last 2 columns, unlike the rest of the columns, do not have a one-to-one mapping with other values belonging to the same row i.e. values in the last 2 columns do not have any relation to the other values in the same row as it.

### Files' status after step 4  (value of variables in file - clients' variables encrypted with Fernet)

In [22]:
df_gojek = gojek.get_my_data()
df_grab = grab.get_my_data()
print("gojek's file:")
df_gojek


gojek's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtZjXyT1d9JOb_hYn5V2Lc5LHLHB0FrMfA...,b'gAAAAABhoyRuHXVzkaFhxIAozpLcf3tGiX_Y3Qv9qij4...,b'gAAAAABhoyRvEPGTiSIgyBQhdijCptReQxCL8HOnj-oj...
1,b'gAAAAABhoyRttMoRH-YDnWmm0BkiG_ptrHBeGlzkMLMF...,b'gAAAAABhoyRuzJHG_3T8LCdVMB1T8zrq7uhXYOnSjcLY...,b'gAAAAABhoyRvy8lOjSTT1z1LYXmcBOQR8H4XHDw2wYsQ...
2,b'gAAAAABhoyRtYBoCAsKgDgUB2NqCc-_gRGBol33AEuCW...,b'gAAAAABhoyRuUbp0TjXsYHNCzRvM3WgT4cVpTZdR-bD6...,b'gAAAAABhoyRvH8RKnVOGN7zzXPs-kprbGQOZOfFkngQu...
3,b'gAAAAABhoyRtZN9f8-oOZ8cyNek5W4999Tzi3ptMZo0B...,b'gAAAAABhoyRusO_hll-qeQS5i4UNQrFN7HArRO_ZGtzu...,b'gAAAAABhoyRvdL9ktgk34XPk4cwLcF_qgToiT_9zTJqb...
4,b'gAAAAABhoyRtmJa5X8nt0fdqWGHg9I9WxTuPNdY-Sgfb...,b'gAAAAABhoyRuyf9AL3JPxj4qZDP596uK_tYWUtNh_SWG...,b'gAAAAABhoyRv-Ff1_qoETUkxe1yPxmd_K8T5UFh2HDk1...
...,...,...,...
86,,b'gAAAAABhoyRur6aZCJyP6tJXpcMczAcQEtCeryJrXl_y...,
87,,b'gAAAAABhoyRuYGyVxaBF6za_PATDQP1uBxAt0YKnUlZz...,
88,,b'gAAAAABhoyRu87a4PiVUtHyVQ397GS4CKNiPW1OdznDX...,
89,,b'gAAAAABhoyRub0JL28szNeR2IK7p1w4dFgC1QYetBB0z...,


In [23]:
print("grab's file:")
df_grab

grab's file:


Unnamed: 0,my_self_encrypted_set,other_party_encrypted_set,common_values
0,b'gAAAAABhoyRtQjSn2xUuHAjrXeJ7UZKRXCn_enW_ZGgB...,b'gAAAAABhoyRuzmVAG_IdWi8QxYvz5FPIi3ZUSaL51bZA...,b'gAAAAABhoyRvyhxJ2oplIU__SyXdc6loa-yYsP63UdD3...
1,b'gAAAAABhoyRtpq-E39Fn3VxbXYKY7C9zYRhxOueA3exQ...,b'gAAAAABhoyRuax3yzZyzsqNEdO_k--mG6YaWyAB5pqkG...,b'gAAAAABhoyRvjEWAOrC5j29S1xhYuqu5HFI18vQF1RZL...
2,b'gAAAAABhoyRtsPKY6y7WZ-Bstc6kko-994DbFM0wsjIm...,b'gAAAAABhoyRuoHucbkJ3i1As4MjH5nzDsh-SfXSd3edI...,b'gAAAAABhoyRv582dL6V0Dg-5qGHzlTD1O0Pb4txQhZJ7...
3,b'gAAAAABhoyRtNGXIRwi7A-Qc5xiihDIavTDxkm0gWyUa...,b'gAAAAABhoyRuGnli0RqhNR_nZzFiTKiXW_xiJ9NpixjZ...,b'gAAAAABhoyRvrJoDv4Pdr4CYD5GVzSHsFaJjsSXR2f8q...
4,b'gAAAAABhoyRt8Ir3Y7lomRhPWcChLjuzfyyPujOaN3a_...,b'gAAAAABhoyRuiTdwoaQ0nS3pABEdIwC58-auGp4ZJngU...,b'gAAAAABhoyRvyX9FZtA6l4VjrqzFeyfXlX_wY5t_QJX9...
...,...,...,...
86,b'gAAAAABhoyRtFQjSWAtGXyQR2gAcUHZKUrYs3_HEDRt0...,,
87,b'gAAAAABhoyRtlAEqD8SRwJumtV-VE41gSh_LlgTVTG9d...,,
88,b'gAAAAABhoyRtMUqiT1cUE1-oov5hUAn887MSFxiEpWO0...,,
89,b'gAAAAABhoyRtJtUobAyaf5rZbh2-DgjUV-68o2qAX2oI...,,


# Check results

In [24]:
# get intersection found by the two parties
gojek_found_intersection = gojek.common_values
grab_found_intersection = grab.common_values


# sort numbers for easier comparison
gojek_found_intersection.sort()
grab_found_intersection.sort()
common_phone_numbers.sort()

# summarize them in a dataframe
d = {"actual": common_phone_numbers,
    "gojek": gojek_found_intersection,
    "grab": grab_found_intersection}
df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()]))
df

Unnamed: 0,actual,gojek,grab
0,80497694,80497694,80497694
1,81093373,81093373,81093373
2,86915509,86915509,86915509
3,89312048,89312048,89312048
4,94391128,94391128,94391128
5,95521626,95521626,95521626
6,96192082,96192082,96192082
7,96485172,96485172,96485172
8,99173089,99173089,99173089
9,99397525,99397525,99397525
