In [3]:
# import libraries (non-cryptogaphic)
import random # to generate phone numbers
import pandas as pd
from functools import reduce
import os

# import libraries (cryptographic)
import cryptography
import hashlib
#import Crypto
import sympy
import secrets

# Generate phone numbers and store them in phone_numbers.csv file

In [3]:
random.seed(10) # to ensure same phone numbers generated every time

In [19]:
# class to generate phone numbers for grab and gojek
class PhoneNumberGenerator:
    def __call__(self, count):
        phone_numbers = random.sample(range(80000000,100000000), count)
        return phone_numbers 

    
# class to store numbers to csv
class PhoneNumberStorageManager:
    def __init__(self):
        self.filename = "phone_numbers.csv"
    def __call__(self, gojek_phone_numbers, grab_phone_numbers):
        d = {"gojek": gojek_phone_numbers, 
            "grab": grab_phone_numbers}
        df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in d.items()])) # create dataframe
        df.to_csv(self.filename, index = False)

In [20]:
# generate and store phone numbers
gojek_phone_number_count = 61 # inclusive of phone numbers in common with grab
grab_phone_number_count = 91 # inclusive of phone numbers in common with gojek
common_phone_number_count = 10

# instantiate required classes
phone_number_generator = PhoneNumberGenerator()
phone_number_storage_manager = PhoneNumberStorageManager()

# generate phone numbers
phone_numbers = phone_number_generator(gojek_phone_number_count+grab_phone_number_count-common_numbers)
common_phone_numbers = phone_numbers[0:common_phone_number_count]
gojek_phone_numbers = phone_numbers[0:gojek_phone_number_count]
grab_phone_numbers = phone_numbers[gojek_phone_number_count:] + common_phone_numbers

#shuffle phone number lists
random.shuffle(gojek_phone_numbers)
random.shuffle(grab_phone_numbers)

# write phone numbers to csv file
phone_number_storage_manager(gojek_phone_numbers, grab_phone_numbers)

# Define classes for the necessary for the algorithm

In [None]:
# class to generate clients' private secret
class NumberGenerator:
    
    def generate_safe_prime(self, size):
        candidate = Crypto.Util.number.getPrime(size)
        print(candidate)
        while True:
            is_safe_prime = sympy.ntheory.isprime((candidate-1)/2) # if same prime, (candidate-1)/2 is prime
            if (is_safe_prime):
                break
            else:
                candidate = Crypto.Util.number.getPrime(size) # change this, use cryptography package to generate diffie-hellman parameters
                print(candidate)
        return candidate

    def generate_random_number(self, size):
        return secrets.randbits(size)
            
# class to inspect values          
class NumberInspector:
    
    def check_is_primitive_generator(self, candidate, factors_pminus1, divisor): # note: factors should be the factors of divisor-1
        
        # apply lagrange theorem
        for possible_order in factors_pminus1: # check congruence for all factors (factors is exclusive of p-1 itself)
            result = pow(candidate, possible_order, divisor) # fast modular exponentiation
            if (result == 1):
                return false # BUG: False not false
        return true # BUG: True not true
    
    def get_factors(self, n):
        # retrieved from https://stackoverflow.com/questions/6800193/what-is-the-most-efficient-way-of-finding-all-the-factors-of-a-number-in-python
        return set(reduce(list.__add__, 
                ([i, n//i] for i in range(1, int(n**0.5) + 1) if n % i == 0)))
    
class StorageManager:
    
    def store_data(self, filename, data):
        df.to_csv(filename, index=False)

                


    
        

# Create psi client class

In [None]:
# set global parameters
# set_size = 1000 # pre-agreed set size to exchange
# padding should not be in this version

In [None]:
# client class (both grab and gojek are clients communicating directly with each other)
class Client:
    def __init__(self, name, private_key_size, phone_numbers, p, factors_pminus1):
        
        self.number_inspector = NumberInspector()
        self.private_key = generate_random_number(private_key_size)
        self.my_set = phone_numbers
        self.p = p
        self.factors_pminus1 = factors_pminus1
        
        self.my_hashed_set = None
        self.my_self_encrypted_set = None
        self.my_encrypted_set = None
        self.other_party_encrypted_set = None
        self.common_values = None
        
        # create datafile for communication with another party
        # only need to share self_encrypted_values and other_party_encrypted_values
        self.my_dict = {
            'my_self_encrypted_set': self.my_self_encrypted_set, 
            'other_party_encrypted_set': self.other_party_encrypted_set,
            'common_values': self.common_values
        }
        
        self.name = name
        self.filename = name + "_data.csv"
        
        self.storage_manager = StorageManager()
        storage_manager.store_data(self.filename, self.my_dict)

    def hash_to_primitive_root_modulo_p(self, element):
        # BUG: hashlib.sha256() only accepts bytes
        # FIX:
        # endian = "big"
        # element = element.to_bytes(4, endian)
        
        hash_hex = hashlib.sha256(element).hexdigest() #sha3_256
        hash_int = int(hash_hex, 16)
        while True:
            is_primitive_generator = self.number_inspector.check_is_primitive_generator(
                hash_int, self.factors_pminus1, self.p
            )
            if (is_primitive_generator):
                break
            else:
                # BUG: hashlib.sha256() only accepts bytes
                # FIX:
                # hash_int = hash_int.to_bytes(32, endian)
                hash_hex = hashlib.sha256(hash_int).hexdigest()
                hash_int = int(hash_hex, 16)
        return hash_int
    
    def modular_exponentation(self, element):
        return pow(element, self.private_key, self.p)
    
    def hash_set(self):
        
        self.my_hashed_set = []
        
        for element in self.my_set:
            hashed_value = hash_to_primitive_root_modulo_p(element)
            self.my_hashed_set.append(hashed_value)
            
    def encrypt_set(self, is_other_party, other_party_set = None):
        
        if (is_other_party):
            set_to_encrypt = other_party_set
        else:
            set_to_encrypt = self.my_hashed_set
        
        encrypted_values = []
        for element in set_to_encrypt:
            encrypted_value = modular_exponentation(element)
            encrypted_values.append(encrypted_value)
            
        if (is_other_party):
            self.other_party_encrypted_set = encrypted_values
        else:
            self.my_self_encrypted_set = encrypted_values
            
        self.storage_manager.store_data(self.filename, self.my_dict)
        
        
    def get_intersection(self, my_encrypted_set):
        
        self.my_encrypted_set = my_encrypted_set
        encrypted_common_values = set(self.my_encrypted_set).intersection(self.other_party_encrypted_set)
        index_of_common_values = []
        
        for element in encrypted_common_values:
            index_of_common_values.append(self.my_encrypted_set.index(element))
            
        self.common_values = []
        
        for index in index_of_common_values:
            self.common_values.append(self.my_set[index])
            
        self.storage_manager.store_data(self.filename, self.my_dict)

    
    def get_stored_data():
        
        return pd.read_csv(self.filename)

# Initialize context

In [None]:
# assign pre-determined variables for psi
key_size = 1024 # both private keys and large prime

number_generator = NumberGenerator()
p = number_generator.generate_safe_prime(key_size)

number_inspector = NumberInspector()
factors_pminus1 = NumberInspector.get_factors(p-1)

# create clients
grab = Client("grab", key_size, grab_phone_numbers, p, factors_pminus1)
gojek = Client("gojek", key_size, gojek_phone_numbers, p, factors_pminus1)



# Get intersection

In [None]:
# clients hash their own set
grab.hash_set()
gojek.hash_set()

# clients self encrypt hashed set
grab.encrypt_set(False) # set is_other_party to false to encrypt own hashed set
gojek.encrypt_set(False)

# read both clients self encrypted set
grab_data = grab.get_stored_data()
gojek_data = gojek.get_stored_data()

grab_self_encrypted_set = grab_data["my_self_encrypted_set"].tolist()
gojek_self_encrypted_set = gojek_data["my_self_encrypted_set"].tolist()

# clients encrypt other party's self encrypted set (data exchange occurs here)
grab.encrypt_set(True, gojek_self_encrypted_set) # set is_other_party to true to encrypt the set passed into function
gojek.encrypt_set(True, grab_self_encrypted_set)

# read both clients encrypted set
grab_data = grab.get_stored_data()
gojek_data = gojek.get_stored_data()

gojek_encrypted_set = grab_data["other_party_encrypted_set"].tolist()
grab_encrypted_set = gojek_data["other_party_encrypted_set"].tolist()

# clients find intersection (data exchange occurs here)
grab.get_intersection(self, grab_encrypted_set)
gojek.get_intersection(self, gojek_encrypted_set)