In [1]:
import os
import time
import joblib
import psutil
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, 
                             classification_report, confusion_matrix)
from sklearn.preprocessing import LabelEncoder
from concrete.ml.sklearn import LogisticRegression as ConcreteLogisticRegression

In [2]:
# Load the dataset
df = pd.read_csv('../data/raw/dataset_1/newdataset.csv')

# First Column (Diseases) is the target variable.
# All other columns are symptoms.
X = df.drop('diseases', axis=1)
X = X.astype(np.float32)
y = df['diseases']

# store X and y
joblib.dump(X, '../data/raw/raw_concrete_X.pkl')
joblib.dump(y, '../data/raw/raw_concrete_y.pkl')

['../data/raw/raw_concrete_y.pkl']

In [3]:
# Encode disease names to numerical labels.
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Create a train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

joblib.dump(X_train, '../data/raw/X_train_concrete.pkl')

['../data/raw/X_train_concrete.pkl']

In [4]:
# Get training model time using ConcreteML Training
start_train = time.time()
model = ConcreteLogisticRegression()
model.fit(X_train, y_train)
training_time = time.time() - start_train

# Get Resource Utilisation after training
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / 1024 ** 2  # Convert bytes to MB

In [5]:
# Get prediction time on Plaintext Data
start_pred = time.time()
y_pred = model.predict(X_test)
prediction_time = time.time() - start_pred

In [6]:
# Get evaluation metrics
accuracy_plain = accuracy_score(y_test, y_pred)
precision_plain = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall_plain = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1_plain = f1_score(y_test, y_pred, average='weighted', zero_division=0)
class_report_plain = classification_report(y_test, y_pred, zero_division=0)
conf_matrix_plain = confusion_matrix(y_test, y_pred)

output_file = "../results/lr_encrypted_plain.txt"
with open(output_file, "w") as f:
    f.write("Plaintext Evaluation Metrics for LR Encrypted Model:\n")
    f.write("-----------------------------------------------\n")
    f.write(f"Training Time      : {training_time:.4f} seconds\n")
    f.write(f"Prediction Time    : {prediction_time:.4f} seconds\n")
    f.write(f"Memory Usage       : {memory_usage:.2f} MB\n")
    f.write(f"Accuracy           : {accuracy_plain:.4f}\n")
    f.write(f"Precision          : {precision_plain:.4f}\n")
    f.write(f"Recall             : {recall_plain:.4f}\n")
    f.write(f"F1 Score           : {f1_plain:.4f}\n")
    f.write("\nClassification Report:\n")
    f.write(class_report_plain + "\n")
    f.write("Confusion Matrix:\n")
    f.write(str(conf_matrix_plain) + "\n")

print(f"Plaintext evaluation results saved to {output_file}")

Plaintext evaluation results saved to ../results/lr_encrypted_plain.txt


In [7]:
# Compile Model For FHE Inference
print("Compiling model for FHE inference...")
start_compile = time.time()
model.compile(X_train)
compile_time = time.time() - start_compile
print(f"Compilation Time: {compile_time:.4f} seconds")

Compiling model for FHE inference...
Compilation Time: 237.2385 seconds


In [8]:
# Save model
from pathlib import Path
fhe_model_path = Path('../models/compiled_lr_model.json')
with fhe_model_path.open('w') as f:
    model.dump(f)


In [9]:
# Get prediction time on Encrypted Data
start_pred = time.time()
y_pred_fhe = model.predict(X_test, fhe="execute")
prediction_time = time.time() - start_pred

In [11]:
# Get evaluation metrics
accuracy_plain_fhe = accuracy_score(y_test, y_pred_fhe)
precision_plain_fhe = precision_score(y_test, y_pred_fhe, average='weighted', zero_division=0)
recall_plain_fhe = recall_score(y_test, y_pred_fhe, average='weighted', zero_division=0)
f1_plain_fhe = f1_score(y_test, y_pred_fhe, average='weighted', zero_division=0)
class_report_plain_fhe = classification_report(y_test, y_pred_fhe, zero_division=0)
conf_matrix_plain_fhe = confusion_matrix(y_test, y_pred_fhe)

output_file = "../results/lr_encrypted.txt"
with open(output_file, "w") as f:
    f.write("FHE LR Evaluation Metrics for Encrypted Model:\n")
    f.write("-----------------------------------------------\n")
    f.write(f"Compile Time      : {compile_time:.4f} seconds\n")
    f.write(f"Prediction Time    : {prediction_time:.4f} seconds\n")
    f.write(f"Accuracy           : {accuracy_plain_fhe:.4f}\n")
    f.write(f"Precision          : {precision_plain_fhe:.4f}\n")
    f.write(f"Recall             : {recall_plain_fhe:.4f}\n")
    f.write(f"F1 Score           : {f1_plain_fhe:.4f}\n")
    f.write("\nClassification Report:\n")
    f.write(class_report_plain_fhe + "\n")
    f.write("Confusion Matrix:\n")
    f.write(str(conf_matrix_plain_fhe) + "\n")

print(f"FHE evaluation results saved to {output_file}")

FHE evaluation results saved to ../results/lr_encrypted.txt
