In [1]:
import time
import statistics
import numpy as np

# -------------------------------
# 1. Install & Import TenSEAL
# -------------------------------
#  pip install tenseal==0.9.0  (for example)
#  or see https://github.com/OpenMined/TenSEAL
import tenseal as ts

# -------------------------------
# Parameters (matching Part A)
# -------------------------------
# Suppose in Part A your embedding dimension was 512:
VECTOR_DIM = 512   
# If you truncated to 4 decimal places in Part A, 
# you can incorporate that in your random data generation if desired.

# Number of times to repeat steps 1–4 for performance metrics
REPETITIONS = 100

# -------------------------------
# 2. Create CKKS Context & Keys
# -------------------------------
# CKKS (HEaaN) scheme is used for approximate arithmetic on floating-point
# data. We configure poly_modulus_degree and coeff_mod_bit_sizes (the modulus
# chain) to balance performance, noise budget, and precision.
#
# Below is a common test configuration. For large-scale real usage, you may
# need bigger parameters.
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,              # or 16384 for more complex operations
    coeff_mod_bit_sizes=[60, 40, 40, 60]   # Typical "default" chain
)
context.generate_galois_keys()  # Needed for .sum() or vector rotations
# Optional (but often recommended) if you do repeated multiplications:
# context.generate_relin_keys()

# Set the global scale (precision). This is a trade-off between numerical
# accuracy and available noise budget.
context.global_scale = 2 ** 40

# -------------------------------
# 3. Define Homomorphic Operations
# -------------------------------
# (Euclidean distance & Cosine similarity)
# We'll show them as separate functions. We return the *decrypted* final
# scalar so that we can compare to the cleartext. If you want to keep it
# fully encrypted, just return the encrypted ciphertext.

def homomorphic_euclidean_distance(enc_vec1, enc_vec2):
    """
    Homomorphically computes Euclidean distance between enc_vec1 and enc_vec2:
        dist = sqrt(sum( (vec1 - vec2)^2 ))
    Returns the final scalar as float (decrypted).
    """
    diff = enc_vec1 - enc_vec2               # ciphertext of (vec1 - vec2)
    squared_diff = diff * diff               # ciphertext of (vec1 - vec2)^2
    sum_squared_diff = squared_diff.sum()    # ciphertext of sum( (vec1 - vec2)^2 )
    decrypted_sum = sum_squared_diff.decrypt()[0]  # single float in slot 0
    return np.sqrt(decrypted_sum)

def homomorphic_cosine_similarity(enc_vec1, enc_vec2):
    """
    Homomorphically computes Cosine similarity between enc_vec1 and enc_vec2:
        cos_sim = (vec1 · vec2) / (||vec1|| * ||vec2||)
    Returns the final scalar as float (decrypted).
    """
    dot_product = (enc_vec1 * enc_vec2).sum()  # ciphertext of dot(vec1, vec2)
    norm1 = (enc_vec1 * enc_vec1).sum()        # ciphertext of dot(vec1, vec1)
    norm2 = (enc_vec2 * enc_vec2).sum()        # ciphertext of dot(vec2, vec2)

    # Decrypt intermediate results
    decrypted_dot = dot_product.decrypt()[0]
    decrypted_norm1 = norm1.decrypt()[0]
    decrypted_norm2 = norm2.decrypt()[0]

    return decrypted_dot / (np.sqrt(decrypted_norm1) * np.sqrt(decrypted_norm2))

# -------------------------------
# 4. Repeat Steps 1–4 & Measure Performance
# -------------------------------
accuracy_results = {"Euclidean": [], "Cosine": []}

# We'll track runtime of each step:
# (1) Vector Generation, (2) Encryption, (3) Encrypted Computation, (4) Decryption / Comparison
runtime_results = {
    "Generation": [],
    "Encryption": [],
    "Computation": [],
    "Decryption": []   # For clarity, though we decrypt in-line in the functions above
}

for _ in range(REPETITIONS):
    # Step 1: Generate two random vectors
    t0 = time.time()
    vector1 = np.random.uniform(0.0, 1.0, size=VECTOR_DIM)
    vector2 = np.random.uniform(0.0, 1.0, size=VECTOR_DIM)
    runtime_results["Generation"].append(time.time() - t0)

    # Step 2: Encrypt the vectors
    t0 = time.time()
    enc_vec1 = ts.ckks_vector(context, vector1)
    enc_vec2 = ts.ckks_vector(context, vector2)
    runtime_results["Encryption"].append(time.time() - t0)

    # Step 3: Homomorphically compute the similarity scores
    t0 = time.time()
    # (A) Euclidean Distance
    encrypted_euclidean = homomorphic_euclidean_distance(enc_vec1, enc_vec2)
    # (B) Cosine Similarity
    encrypted_cosine = homomorphic_cosine_similarity(enc_vec1, enc_vec2)
    runtime_results["Computation"].append(time.time() - t0)

    # Step 4: Decrypt and measure accuracy
    # (We partly do decrypt inside the functions above, but let's still track time.)
    t0 = time.time()
    cleartext_euclidean = np.linalg.norm(vector1 - vector2)
    cleartext_cosine = np.dot(vector1, vector2) / (
        np.linalg.norm(vector1) * np.linalg.norm(vector2)
    )
    # Compare encrypted vs. cleartext
    accuracy_results["Euclidean"].append(abs(encrypted_euclidean - cleartext_euclidean))
    accuracy_results["Cosine"].append(abs(encrypted_cosine - cleartext_cosine))
    runtime_results["Decryption"].append(time.time() - t0)

# -------------------------------
# Print Summary of Accuracy Stats
# -------------------------------
import math
import statistics

print("\n=== Accuracy Results (Absolute Differences) ===")
for metric, values in accuracy_results.items():
    avg_diff = statistics.mean(values)
    std_diff = statistics.pstdev(values) if len(values) > 1 else 0.0
    max_diff = max(values)
    print(f"{metric}: avg={avg_diff:.6e}, std={std_diff:.6e}, max={max_diff:.6e}")

# -------------------------------
# Print Summary of Runtime Stats
# -------------------------------
print("\n=== Runtime Results (seconds) ===")
for step, times in runtime_results.items():
    avg_t = statistics.mean(times)
    std_t = statistics.pstdev(times) if len(times) > 1 else 0.0
    max_t = max(times)
    print(f"{step}: avg={avg_t:.4f}s, std={std_t:.4f}s, max={max_t:.4f}s")

# -------------------------------
# (Optional) Demonstration of final "cleartext vs. encrypted" check
# -------------------------------
print("\n=== Final Demo ===")
demo_vec1 = np.random.uniform(0.0, 1.0, size=VECTOR_DIM)
demo_vec2 = np.random.uniform(0.0, 1.0, size=VECTOR_DIM)
ct_demo_vec1 = ts.ckks_vector(context, demo_vec1)
ct_demo_vec2 = ts.ckks_vector(context, demo_vec2)

# Encrypted computations
encrypted_euc_demo = homomorphic_euclidean_distance(ct_demo_vec1, ct_demo_vec2)
encrypted_cos_demo = homomorphic_cosine_similarity(ct_demo_vec1, ct_demo_vec2)

# Cleartext computations
clear_euc_demo = np.linalg.norm(demo_vec1 - demo_vec2)
clear_cos_demo = np.dot(demo_vec1, demo_vec2) / (
    np.linalg.norm(demo_vec1) * np.linalg.norm(demo_vec2)
)

print(f"Euclidean distance (encrypted) = {encrypted_euc_demo:.6f} vs. cleartext = {clear_euc_demo:.6f}")
print(f"Cosine similarity (encrypted) = {encrypted_cos_demo:.6f} vs. cleartext = {clear_cos_demo:.6f}")



=== Accuracy Results (Absolute Differences) ===
Euclidean: avg=7.261438e-07, std=1.368150e-08, max=7.614550e-07
Cosine: avg=2.957902e-09, std=8.402525e-10, max=4.819929e-09

=== Runtime Results (seconds) ===
Generation: avg=0.0000s, std=0.0001s, max=0.0010s
Encryption: avg=0.0091s, std=0.0005s, max=0.0110s
Computation: avg=0.0710s, std=0.0040s, max=0.0933s
Decryption: avg=0.0000s, std=0.0002s, max=0.0015s

=== Final Demo ===
Euclidean distance (encrypted) = 9.206356 vs. cleartext = 9.206355
Cosine similarity (encrypted) = 0.750358 vs. cleartext = 0.750358
