In [1]:
import os
import time
import math

import pandas as pd
import tenseal as ts

In [2]:
def cleanup():
    for f in [
        "enc_ckks.dat",
        "res_ckks_stats.dat"
    ]:
        if os.path.exists(f):
            os.remove(f)

In [3]:
def setup_ckks():
    context = ts.context(
        ts.SCHEME_TYPE.CKKS,
        poly_modulus_degree=16384,
        coeff_mod_bit_sizes=[60, 40, 40, 40, 60],
    )
    context.global_scale = 2**40
    context.generate_galois_keys()
    return context


def holder_encrypt_ckks(context, salary, bonus, filename):
    enc_s = ts.ckks_vector(context, salary)
    enc_b = ts.ckks_vector(context, bonus)

    ser_s = enc_s.serialize()
    ser_b = enc_b.serialize()

    with open(filename, "wb") as f:
        f.write(len(ser_s).to_bytes(4, "big"))
        f.write(ser_s)
        f.write(ser_b)


def analyzer_process_ckks(context, in_file, out_file):
    with open(in_file, "rb") as f:
        size_s = int.from_bytes(f.read(4), "big")
        bytes_s = f.read(size_s)
        bytes_b = f.read()

    enc_s = ts.ckks_vector_from(context, bytes_s)
    enc_b = ts.ckks_vector_from(context, bytes_b)

    # Number of entries
    n_s = enc_s.size()
    n_b = enc_b.size()

    # Salary statistics
    # Σx 
    enc_s_sum = enc_s.sum() 
    
    # Σx²
    enc_s_sum_sq = (enc_s * enc_s).sum()

    # Mean: E[x]
    enc_salary_mean = enc_s_sum.mul(1.0 / n_s)

    # Variance: E[x²] − (E[x])²
    enc_salary_variance = enc_s_sum_sq.mul(1.0 / n_s) - (
        enc_salary_mean * enc_salary_mean
    )

    # Bonus statistics
    # Σy
    enc_b_sum = enc_b.sum()

    # Σy²
    enc_b_sum_sq = (enc_b * enc_b).sum()

    # Mean: E[y]
    enc_bonus_mean = enc_b_sum.mul(1.0 / n_b)
    
    # Variance: E[y²] − (E[y])²
    enc_bonus_variance = enc_b_sum_sq.mul(1.0 / n_b) - (enc_bonus_mean * enc_bonus_mean)

    # Result computation: (salary + 0.1 * bonus) * 1.05
    enc_result = (enc_s + enc_b.mul(0.1)).mul(1.05)
    enc_total = enc_result.sum()

    # Serialize results
    with open(out_file, "wb") as f:
        ser_sal_mean = enc_salary_mean.serialize()
        ser_sal_var = enc_salary_variance.serialize()
        ser_bon_mean = enc_bonus_mean.serialize()
        ser_bon_var = enc_bonus_variance.serialize()
        ser_total = enc_total.serialize()

        f.write(len(ser_sal_mean).to_bytes(4, "big"))
        f.write(ser_sal_mean)

        f.write(len(ser_sal_var).to_bytes(4, "big"))
        f.write(ser_sal_var)

        f.write(len(ser_bon_mean).to_bytes(4, "big"))
        f.write(ser_bon_mean)

        f.write(len(ser_bon_var).to_bytes(4, "big"))
        f.write(ser_bon_var)

        f.write(len(ser_total).to_bytes(4, "big"))
        f.write(ser_total)


def holder_decrypt_ckks(context, filename):
    with open(filename, "rb") as f:
        # Read salary mean
        size1 = int.from_bytes(f.read(4), "big")
        enc_sal_mean = ts.ckks_vector_from(context, f.read(size1))

        # Read salary variance
        size2 = int.from_bytes(f.read(4), "big")
        enc_sal_var = ts.ckks_vector_from(context, f.read(size2))

        # Read bonus mean
        size3 = int.from_bytes(f.read(4), "big")
        enc_bon_mean = ts.ckks_vector_from(context, f.read(size3))

        # Read bonus variance
        size4 = int.from_bytes(f.read(4), "big")
        enc_bon_var = ts.ckks_vector_from(context, f.read(size4))

        # Read total result
        size5 = int.from_bytes(f.read(4), "big")
        enc_total = ts.ckks_vector_from(context, f.read(size5))

    salary_mean = enc_sal_mean.decrypt()[0]
    salary_var = enc_sal_var.decrypt()[0]
    bonus_mean = enc_bon_mean.decrypt()[0]
    bonus_var = enc_bon_var.decrypt()[0]
    total_result = enc_total.decrypt()[0]

    return {
        "salary_mean": salary_mean,
        "salary_variance": salary_var,
        "bonus_mean": bonus_mean,
        "bonus_variance": bonus_var,
        "total_result": total_result,
    }

In [4]:
# Read dataset
df = pd.read_csv("datasets/dataset.csv")
salaries_list = df["salary_cents"].tolist()
bonus_list = df["bonus_cents"].tolist()

print(f"Dataset Size: {len(salaries_list)} rows")

# Ground Truth
salary_mean_gt = sum(salaries_list) / len(salaries_list)
salary_var_gt = sum((x - salary_mean_gt) ** 2 for x in salaries_list) / len(
    salaries_list
)
salary_std_gt = math.sqrt(salary_var_gt)
bonus_mean_gt = sum(bonus_list) / len(bonus_list)
bonus_var_gt = sum((x - bonus_mean_gt) ** 2 for x in bonus_list) / len(bonus_list)
bonus_std_gt = math.sqrt(bonus_var_gt)
z_score_salary_gt = [(x - salary_mean_gt) / salary_std_gt for x in salaries_list]
z_score_bonus_gt = [(x - bonus_mean_gt) / bonus_std_gt for x in bonus_list]
total_result_gt = sum([(s + 0.1 * b) * 1.05 for s, b in zip(salaries_list, bonus_list)])

Dataset Size: 8192 rows


In [5]:
print("=" * 80)
print("CKKS - STATISTICAL ANALYSIS")
print("=" * 80)

# Cleanup any existing files before execution
cleanup()

# Execution CKKS Statistics
ctx_ckks = setup_ckks()

start = time.time()
holder_encrypt_ckks(ctx_ckks, salaries_list, bonus_list, "enc_ckks.dat")
t_enc = time.time() - start

start_proc = time.time()
analyzer_process_ckks(ctx_ckks, "enc_ckks.dat", "res_ckks_stats.dat")
t_proc = time.time() - start_proc

start_dec = time.time()
ckks_stats = holder_decrypt_ckks(ctx_ckks, "res_ckks_stats.dat")
t_dec = time.time() - start_dec

print(f"\n--- Timing ---")
print(f"Encryption:  {t_enc:.4f}s")
print(f"Processing:  {t_proc:.4f}s")
print(f"Decryption:  {t_dec:.4f}s")
print(f"Total Time:  {t_enc + t_proc + t_dec:.4f}s")

# Calculate standard deviations
salary_std = math.sqrt(ckks_stats["salary_variance"])
bonus_std = math.sqrt(ckks_stats["bonus_variance"])

# Calculate z-scores
z_score_salary = [(x - ckks_stats["salary_mean"]) / salary_std for x in salaries_list]
z_score_bonus = [(x - ckks_stats["bonus_mean"]) / bonus_std for x in bonus_list]

# ==============================================================================
# VERIFICATION
# ==============================================================================

print("\n--- Statistics Verification ---")
print(f"\nSalary Mean:")
print(f"  Ground Truth: {salary_mean_gt:.2f}")
print(f"  CKKS Result:  {ckks_stats['salary_mean']:.2f}")
print(f"  Error:       {abs(salary_mean_gt - ckks_stats['salary_mean']):.4f}")

print(f"\nSalary Variance:")
print(f"  Ground Truth: {salary_var_gt:.2f}")
print(f"  CKKS Result:  {ckks_stats['salary_variance']:.2f}")
print(f"  Error:       {abs(salary_var_gt - ckks_stats['salary_variance']):.4f}")

print(f"\nSalary Standard Deviation:")
print(f"  Ground Truth: {salary_std_gt:.2f}")
print(f"  CKKS Result:  {salary_std:.2f}")
print(f"  Error:       {abs(salary_std_gt - salary_std):.4f}")

print(f"\nBonus Mean:")
print(f"  Ground Truth: {bonus_mean_gt:.2f}")
print(f"  CKKS Result:  {ckks_stats['bonus_mean']:.2f}")
print(f"  Error:       {abs(bonus_mean_gt - ckks_stats['bonus_mean']):.4f}")

print(f"\nBonus Variance:")
print(f"  Ground Truth: {bonus_var_gt:.2f}")
print(f"  CKKS Result:  {ckks_stats['bonus_variance']:.2f}")
print(f"  Error:       {abs(bonus_var_gt - ckks_stats['bonus_variance']):.4f}")

print(f"\nBonus Standard Deviation:")
print(f"  Ground Truth: {bonus_std_gt:.2f}")
print(f"  CKKS Result:  {bonus_std:.2f}")
print(f"  Error:       {abs(bonus_std_gt - bonus_std):.4f}")

print(f"\nZ-Score Salary (first 5 values):")
for i in range(5):
    print(
        f"  Ground Truth: {z_score_salary_gt[i]:.10f} | CKKS Result: {z_score_salary[i]:.10f} | Error: {abs(z_score_salary_gt[i] - z_score_salary[i]):.10f}"
    )

print(f"\nTotal Result:")
print(f"  Ground Truth: {total_result_gt:.2f}")
print(f"  CKKS Result:  {ckks_stats['total_result']:.2f}")
print(f"  Error:       {abs(total_result_gt - ckks_stats['total_result']):.4f}")

CKKS - STATISTICAL ANALYSIS

--- Timing ---
Encryption:  0.0484s
Processing:  0.4537s
Decryption:  0.0432s
Total Time:  0.5453s

--- Statistics Verification ---

Salary Mean:
  Ground Truth: 273972.64
  CKKS Result:  273973.03
  Error:       0.3919

Salary Variance:
  Ground Truth: 17064149642.34
  CKKS Result:  17064121091.83
  Error:       28550.5082

Salary Standard Deviation:
  Ground Truth: 130629.82
  CKKS Result:  130629.71
  Error:       0.1093

Bonus Mean:
  Ground Truth: 49623.73
  CKKS Result:  49623.81
  Error:       0.0710

Bonus Variance:
  Ground Truth: 839570175.92
  CKKS Result:  839570531.54
  Error:       355.6145

Bonus Standard Deviation:
  Ground Truth: 28975.34
  CKKS Result:  28975.34
  Error:       0.0061

Z-Score Salary (first 5 values):
  Ground Truth: 0.7125812627 | CKKS Result: 0.7125788586 | Error: 0.0000024041
  Ground Truth: 0.9088917236 | CKKS Result: 0.9088894837 | Error: 0.0000022399
  Ground Truth: -0.4776140622 | CKKS Result: -0.4776174620 | Error: 