In [96]:
import time
import numpy as np
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from pyope.ope import OPE, ValueRange
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import os

In [97]:
start_total_time = time.perf_counter()  # Start total execution time
# ✅ AES Encryption Setup
aes_key = os.urandom(32)  # Use this key for encryption/decryption

# ✅ AES Encrypt Function (Fast Mode - ECB)
def aes_encrypt_fast(data, key):
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend())
    encryptor = cipher.encryptor()
    padded_data = data.ljust(16)
    ciphertext = encryptor.update(padded_data.encode()) + encryptor.finalize()
    return ciphertext

# ✅ AES Decrypt Function (Fast Mode - ECB)
def aes_decrypt_fast(ciphertext, key):
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend())
    decryptor = cipher.decryptor()
    return decryptor.update(ciphertext).decode().strip()

In [98]:
# ✅ Load MNIST dataset
start_mnist_load_time = time.perf_counter()  # Start MNIST load time
mnist = fetch_openml("mnist_784", version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(int)
end_mnist_load_time = time.perf_counter()  # End MNIST load time
mnist_load_time = end_mnist_load_time - start_mnist_load_time
print(f"MNIST Load Time: {mnist_load_time:.4f} seconds")


MNIST Load Time: 7.7987 seconds


In [99]:
# ✅ Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define OPE Encryption Scheme
ope_key = b'some_secure_key'
ope = OPE(ope_key, ValueRange(0, 255), ValueRange(0, 2**32))

def encrypt_dataset_with_ope(X):
    """Encrypt dataset using OPE for each feature."""
    return np.array([[ope.encrypt(int(val)) for val in sample] for sample in X])

num_samples = 100

# ✅ Encrypt Training & Test Data Using OPE
start_ope_encryption_time = time.perf_counter()  # Start OPE encryption time
X_train_encrypted = encrypt_dataset_with_ope(X_train[:num_samples])
X_test_encrypted = encrypt_dataset_with_ope(X_test[:num_samples])
end_ope_encryption_time = time.perf_counter()  # End OPE encryption time
ope_encryption_time = end_ope_encryption_time - start_ope_encryption_time
print(f"OPE Encryption Time: {ope_encryption_time:.4f} seconds")

print("OPE Encryption Done!")
print("X_train_encrypted shape:", X_train_encrypted.shape)
print("X_test_encrypted shape:", X_test_encrypted.shape)


OPE Encryption Time: 403.5344 seconds
OPE Encryption Done!
X_train_encrypted shape: (100, 784)
X_test_encrypted shape: (100, 784)


In [100]:
# ✅ Normalize and Clip pixel values (Ensure they are integers for OPE)
scale_factor = 255  # Convert [0,1] range to [0,255]
X_train_scaled = np.clip((X_train * scale_factor), 0, 255).astype(int)
X_test_scaled = np.clip((X_test * scale_factor), 0, 255).astype(int)

In [101]:

num_estimators = 100

# ✅ Train RF Model on OPE-Encrypted Data
clf_ope = RandomForestClassifier(n_estimators=num_estimators, random_state=42)
clf_ope.fit(X_train_encrypted[:num_samples], y_train[:num_samples])  # Train on encrypted dataset

# ✅ Make Predictions on Encrypted Test Set
y_pred_encrypted = clf_ope.predict(X_test_encrypted)

# ✅ Compute Accuracy
secure_accuracy = accuracy_score(y_test[:num_samples], y_pred_encrypted)
print(f"Secure Random Forest Accuracy (OPE Encrypted Data): {secure_accuracy:.4f}")


Secure Random Forest Accuracy (OPE Encrypted Data): 0.7500


In [102]:
# ✅ Encrypt Labels using Faster AES-ECB
encrypted_labels = {label: aes_encrypt_fast(str(label), aes_key) for label in np.unique(y_train)}

In [103]:
# ✅ Extract & Encrypt Thresholds using OPE
thresholds = []
for tree in clf_ope.estimators_:
    tree_thresholds = tree.tree_.threshold
    valid_thresholds = tree_thresholds[tree_thresholds != -2]
    thresholds.extend(valid_thresholds)

thresholds = np.array(thresholds)
scale_factor = 1e6

In [104]:
# ✅ Compute min/max for scaled pixel values
scaled_pixels = np.clip(np.nan_to_num(X_train * scale_factor, nan=0, posinf=scale_factor, neginf=0).astype(int), -1e9, 1e9)
scaled_thresholds = np.clip(np.nan_to_num(thresholds * scale_factor, nan=0, posinf=scale_factor, neginf=0).astype(int), -1e9, 1e9)

min_value = int(min(scaled_pixels.min(), scaled_thresholds.min()) * 0.99)
max_value = int(max(scaled_pixels.max(), scaled_thresholds.max()) * 1.01)

In [105]:
# ✅ Define OPE Ranges
DEFAULT_OUT_RANGE_START, DEFAULT_OUT_RANGE_END = 0, 2**31 - 1
in_range = ValueRange(min_value, max_value)
out_range = ValueRange(DEFAULT_OUT_RANGE_START, DEFAULT_OUT_RANGE_END)

In [106]:
# ✅ Initialize OPE for thresholds and features
ope_key = OPE(b'some_secure_key', in_range, out_range)

In [107]:
# ✅ Encrypt Thresholds with OPE
start_threshold_encryption_time = time.perf_counter()  # Start threshold encryption time
encrypted_thresholds = np.array([ope_key.encrypt(int(th)) for th in scaled_thresholds])
end_threshold_encryption_time = time.perf_counter()  # End threshold encryption time

In [108]:
# ✅ Encrypt Decision Tree Leaf Values using AES-ECB
encrypted_leaf_values = {
    node: aes_encrypt_fast(str(tree.tree_.value[node].argmax()), aes_key)
    for tree in clf_ope.estimators_
    for node in range(tree.tree_.node_count) if tree.tree_.feature[node] == -2
}

In [109]:
# ✅ Optimized Function to Encrypt an Image with OPE
def encrypt_image(image, ope_key, scale_factor=1e6):
    """Encrypt an image using OPE, ensuring valid integer input."""
    start_total = time.time()

    # ✅ Step 1: Scaling
    start_scaling = time.time()
    scaled_image = np.clip(np.nan_to_num(image * scale_factor, nan=0, posinf=scale_factor, neginf=0).astype(int), min_value, max_value)
    scaling_time = time.time() - start_scaling

    # ✅ Step 2: OPE Encryption
    start_encryption = time.time()
    encrypted_image = [ope_key.encrypt(int(pixel)) for pixel in scaled_image]
    encryption_time = time.time() - start_encryption

    total_time = time.time() - start_total

    # ✅ Debugging: Print Time Taken
    print(f"Image Encryption Time: {total_time:.4f} sec | Scaling: {scaling_time:.4f} sec | OPE: {encryption_time:.4f} sec")

    return encrypted_image

In [110]:
# ✅ Optimized Function to Encrypt the Entire Dataset
def encrypt_dataset(X, ope_key, scale_factor=1e6):
    scaled_X = np.clip(np.nan_to_num(X * scale_factor, nan=0, posinf=scale_factor, neginf=0).astype(int), min_value, max_value)
    encrypted_X = [encrypt_image(image, ope_key) for image in scaled_X]
    return np.array(encrypted_X)

In [111]:
start_dataset_encryption_time = time.perf_counter()
print("Encrypting Test Dataset...")
X_test_encrypted = encrypt_dataset(X_test[:num_samples], ope_key)
end_dataset_encryption_time = time.perf_counter()  # Use the same function
dataset_encryption_time = end_dataset_encryption_time - start_dataset_encryption_time
print(f"Dataset Encryption Time: {dataset_encryption_time:.4f} seconds")

Encrypting Test Dataset...
Image Encryption Time: 5.3880 sec | Scaling: 0.0000 sec | OPE: 5.3880 sec
Image Encryption Time: 5.2631 sec | Scaling: 0.0000 sec | OPE: 5.2631 sec
Image Encryption Time: 5.3461 sec | Scaling: 0.0008 sec | OPE: 5.3454 sec
Image Encryption Time: 5.3241 sec | Scaling: 0.0000 sec | OPE: 5.3241 sec
Image Encryption Time: 5.2973 sec | Scaling: 0.0000 sec | OPE: 5.2973 sec
Image Encryption Time: 5.3547 sec | Scaling: 0.0000 sec | OPE: 5.3547 sec
Image Encryption Time: 5.3354 sec | Scaling: 0.0011 sec | OPE: 5.3343 sec
Image Encryption Time: 5.3901 sec | Scaling: 0.0000 sec | OPE: 5.3901 sec
Image Encryption Time: 5.2606 sec | Scaling: 0.0010 sec | OPE: 5.2596 sec
Image Encryption Time: 5.4308 sec | Scaling: 0.0000 sec | OPE: 5.4308 sec
Image Encryption Time: 5.4160 sec | Scaling: 0.0000 sec | OPE: 5.4160 sec
Image Encryption Time: 5.3239 sec | Scaling: 0.0000 sec | OPE: 5.3239 sec
Image Encryption Time: 5.4202 sec | Scaling: 0.0000 sec | OPE: 5.4202 sec
Image Encry

In [112]:
# ✅ Function to Perform Secure Classification
def secure_classify(model, encrypted_X, encrypted_thresholds, encrypted_leaf_values, aes_key):
    votes = []
    for tree in model.estimators_:
        node = 0

        while tree.tree_.feature[node] != -2:
            feature_idx = tree.tree_.feature[node]
            encrypted_threshold = encrypted_thresholds[node]

            if encrypted_X[feature_idx] < encrypted_threshold:
                node = tree.tree_.children_left[node]
            else:
                node = tree.tree_.children_right[node]

        encrypted_value = encrypted_leaf_values[node]
        decrypted_value = aes_decrypt_fast(encrypted_value, aes_key)
        votes.append(int(decrypted_value))

    return Counter(votes).most_common(1)[0][0]

In [113]:
# ✅ Function to Perform Secure Classification on All Test Images
def secure_classify_dataset(model, X_encrypted, encrypted_thresholds, encrypted_leaf_values, aes_key):
    return np.array([secure_classify(model, encrypted_X, encrypted_thresholds, encrypted_leaf_values, aes_key) for encrypted_X in X_encrypted])


In [114]:
# ✅ Measure time taken for classification
start_time = time.time()
print("Performing Secure Classification...")
y_pred_encrypted = secure_classify_dataset(clf_ope, X_test_encrypted, encrypted_thresholds, encrypted_leaf_values, aes_key)
classification_time = time.time() - start_time
print(f"Secure Classification Time: {classification_time:.4f} seconds")


Performing Secure Classification...
Secure Classification Time: 0.6075 seconds


In [115]:
# ✅ Compute Accuracy of Secure Inference
secure_accuracy = accuracy_score(y_test [:num_samples], y_pred_encrypted)

# ✅ Print Secure Classification Results
print(f"Secure Random Forest Accuracy on Encrypted MNIST: {secure_accuracy:.4f}")

Secure Random Forest Accuracy on Encrypted MNIST: 0.1800


In [116]:
# Measure time taken for classifying the first image
start_time = time.time()
print("Classifying the first image...")

# Encrypt the first image
first_image_encrypted = encrypt_image(X_test[0], ope_key)

# Perform secure classification
first_image_prediction = secure_classify(clf_ope, first_image_encrypted, encrypted_thresholds, encrypted_leaf_values, aes_key)

classification_time = time.time() - start_time
print(f"Prediction for the first image: {first_image_prediction}")
print(f"Time taken for classification: {classification_time:.4f} seconds")

Classifying the first image...
Image Encryption Time: 5.3590 sec | Scaling: 0.0000 sec | OPE: 5.3590 sec
Prediction for the first image: 1
Time taken for classification: 5.3672 seconds


In [117]:

# Ensure all timer variables are defined to avoid NameError
if 'start_threshold_encryption_time' not in globals():
    start_threshold_encryption_time = end_threshold_encryption_time = time.perf_counter()

if 'start_label_encryption_time' not in globals():
    start_label_encryption_time = end_label_encryption_time = time.perf_counter()

if 'start_dataset_encryption_time' not in globals():
    start_dataset_encryption_time = end_dataset_encryption_time = time.perf_counter()

if 'start_rf_training_time' not in globals():
    start_rf_training_time = end_rf_training_time = time.perf_counter()

end_total_time = time.perf_counter()  # End total execution time

print("\n===== Execution Time Summary =====")
total_time = end_total_time - start_total_time
dataset_encryption_time = end_dataset_encryption_time - start_dataset_encryption_time
rf_training_time = end_rf_training_time - start_rf_training_time
threshold_encryption_time = end_threshold_encryption_time - start_threshold_encryption_time

total_percentage = 100
dataset_encryption_percentage = (dataset_encryption_time / total_time) * 100
rf_training_percentage = (rf_training_time / total_time) * 100
threshold_encryption_percentage = (threshold_encryption_time / total_time) * 100
classification_percentage = (classification_time / total_time) * 100
ope_encryption_percentage = (ope_encryption_time / total_time) * 100
mnist_load_percentage = (mnist_load_time / total_time) * 100

print(f"📌 Total Execution Time: {total_time:.4f} seconds")
print(f"📌 Dataset Encryption Time: {end_dataset_encryption_time - start_dataset_encryption_time:.4f} seconds ({dataset_encryption_percentage:.2f}%)")
print(f"📌 Random Forest Training Time: {end_rf_training_time - start_rf_training_time:.4f} seconds ({rf_training_percentage:.2f}%)")
print(f"📌 Threshold Encryption Time: {end_threshold_encryption_time - start_threshold_encryption_time:.4f} seconds ({threshold_encryption_percentage:.2f}%)")
print(f"📌 Secure Classification Time: {classification_time:.4f} seconds ({classification_percentage:.2f}%)")
print(f"📌 OPE Encryption Time: {ope_encryption_time:.4f} seconds ({ope_encryption_percentage:.2f}%)")
print(f"📌 MNIST Load Time: {mnist_load_time:.4f} seconds ({mnist_load_percentage:.2f}%)")
print(f"📌 Secure Random Forest Accuracy on Encrypted MNIST: {secure_accuracy:.4f}")
print(f"📌 Number of Decision Trees (num_samples): {num_estimators}")
print(f"📌 Number of Images Used for Training: {num_samples}")




===== Execution Time Summary =====
📌 Total Execution Time: 977.0845 seconds
📌 Dataset Encryption Time: 535.7405 seconds (54.83%)
📌 Random Forest Training Time: 0.8580 seconds (0.09%)
📌 Threshold Encryption Time: 19.5499 seconds (2.00%)
📌 Secure Classification Time: 5.3672 seconds (0.55%)
📌 OPE Encryption Time: 403.5344 seconds (41.30%)
📌 MNIST Load Time: 7.7987 seconds (0.80%)
📌 Secure Random Forest Accuracy on Encrypted MNIST: 0.1800
📌 Number of Decision Trees (num_samples): 100
📌 Number of Images Used for Training: 100
