installing tenseal to use in Part B


In [None]:
#!pip install tenseal

Collecting tenseal
  Downloading tenseal-0.3.15-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.2 kB)
Downloading tenseal-0.3.15-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tenseal
Successfully installed tenseal-0.3.15


In [4]:
import numpy as np
import tenseal as ts
import time
import statistics

# Generate a large set of synthetic data to simulate biometric embeddings
num_vectors = 13233  # Total number of vectors
vector_dim = 2048  # Number of features per vector

# Set up the encryption context for CKKS, which allows secure computations on encrypted data
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40  # Controls precision
context.generate_galois_keys()  # Enables advanced encrypted operations
ts.global_context = context
print("Encryption context created successfully!")

# Define a function to compute Euclidean Distance securely
def homomorphic_euclidean_distance(enc_vec1, enc_vec2):
    diff = enc_vec1 - enc_vec2
    squared_diff = diff * diff
    sum_squared_diff = squared_diff.sum()
    decrypted_sum = sum_squared_diff.decrypt()[0]
    return decrypted_sum ** 0.5

# Define a function to compute Cosine Similarity securely
def homomorphic_cosine_similarity(enc_vec1, enc_vec2):
    dot_product = (enc_vec1 * enc_vec2).sum()
    norm1 = (enc_vec1 * enc_vec1).sum()
    norm2 = (enc_vec2 * enc_vec2).sum()
    decrypted_dot_product = dot_product.decrypt()[0]
    decrypted_norm1 = norm1.decrypt()[0]
    decrypted_norm2 = norm2.decrypt()[0]
    return decrypted_dot_product / (
        (decrypted_norm1 ** 0.5) * (decrypted_norm2 ** 0.5)
    )

# Store accuracy and runtime results for analysis
accuracy_results = {"Euclidean": [], "Cosine": []}
runtime_results = {"Generation": [], "Encryption": [], "Computation": [], "Decryption": []}

# Run the tests multiple times to get reliable statistics
repetitions = 100
for _ in range(repetitions):
    start_time = time.time()

    # Create two random vectors to simulate data
    vector1 = np.random.uniform(0.0, 1.0, size=vector_dim)
    vector2 = np.random.uniform(0.0, 1.0, size=vector_dim)
    runtime_results["Generation"].append(time.time() - start_time)

    # Encrypt both vectors
    start_time = time.time()
    enc_vec1 = ts.ckks_vector(context, vector1)
    enc_vec2 = ts.ckks_vector(context, vector2)
    runtime_results["Encryption"].append(time.time() - start_time)

    # Compute similarity metrics on encrypted data
    start_time = time.time()
    euclidean_result = homomorphic_euclidean_distance(enc_vec1, enc_vec2)
    cosine_result = homomorphic_cosine_similarity(enc_vec1, enc_vec2)
    runtime_results["Computation"].append(time.time() - start_time)

    # Decrypt the results for validation
    start_time = time.time()
    runtime_results["Decryption"].append(time.time() - start_time)

    # Compare encrypted results with cleartext results to measure accuracy
    cleartext_euclidean = np.sqrt(np.sum((vector1 - vector2) ** 2))
    cleartext_cosine = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
    accuracy_results["Euclidean"].append(abs(euclidean_result - cleartext_euclidean))
    accuracy_results["Cosine"].append(abs(cosine_result - cleartext_cosine))

# Print summary of accuracy statistics
print("\nAccuracy Results:")
for metric, values in accuracy_results.items():
    print(f"{metric} Accuracy: Avg={statistics.mean(values)}, Std={statistics.stdev(values)}, Max={max(values)}")

# Print summary of runtime statistics
print("\nRuntime Results:")
for step, times in runtime_results.items():
    print(f"{step} Runtime: Avg={statistics.mean(times):.4f}s, Std={statistics.stdev(times):.4f}s, Max={max(times):.4f}s")

# Generate and encrypt two new vectors, then validate cleartext computations
cleartext_vector1 = np.random.uniform(0.0, 1.0, size=vector_dim)
cleartext_vector2 = np.random.uniform(0.0, 1.0, size=vector_dim)
cleartext_euclidean = np.sqrt(np.sum((cleartext_vector1 - cleartext_vector2) ** 2))
cleartext_cosine = np.dot(cleartext_vector1, cleartext_vector2) / (
    np.linalg.norm(cleartext_vector1) * np.linalg.norm(cleartext_vector2)
)
print("\nCleartext Results:")
print(f"Euclidean Distance (Cleartext): {cleartext_euclidean}")
print(f"Cosine Similarity (Cleartext): {cleartext_cosine}")


Encryption context created successfully!

Accuracy Results:
Euclidean Accuracy: Avg=1.3057538464522622e-06, Std=1.4616033841050174e-08, Max=1.3658752635592464e-06
Cosine Accuracy: Avg=9.407328938060288e-10, Std=3.6275047284677577e-10, Max=1.6695959059731535e-09

Runtime Results:
Generation Runtime: Avg=0.0001s, Std=0.0002s, Max=0.0010s
Encryption Runtime: Avg=0.0107s, Std=0.0014s, Max=0.0170s
Computation Runtime: Avg=0.0934s, Std=0.0105s, Max=0.1384s
Decryption Runtime: Avg=0.0000s, Std=0.0000s, Max=0.0000s

Cleartext Results:
Euclidean Distance (Cleartext): 18.356666544935315
Cosine Similarity (Cleartext): 0.7561546739385002
