In [1]:
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline


import pandas as pd
import numpy as np
import os 

from concrete.ml.sklearn.xgb import XGBClassifier
from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

In [2]:
df = pd.read_csv("medical_data.csv")

# print(df.head())
# print(df.columns)

labels = [
    "Hypertension",
    "Diabetes",
    "Asthma",
    "Coronary Artery Disease",
    "Chronic Kidney Disease",
    "Obesity",
    "Anemia",
    "Osteoporosis",
    "Hyperlipidemia",
    "None"
]

FHE_DIRECTORY = 'tmp/fhe_client_server_files/'

In [3]:
y_col = "Label"

X = df.drop(y_col, axis=1)
y = df[y_col] - 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [4]:
# Define our model
model = XGBClassifier(n_jobs=1, n_bits=3)

pipeline = Pipeline(
    [("model", model)]
)

In [5]:
# Define the parameters to tune
param_grid = {
    "model__max_depth": [2, 3, 5],
    "model__n_estimators": [5, 10, 20],
}

# Instantiate the grid search with 5-fold cross validation on all available cores
grid = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, scoring="accuracy")

# Launch the grid search
grid.fit(X_train, y_train)

print(f"X_train shape: {X_train.shape}")

print(f"Best parameters found: {grid.best_params_}")

X_train shape: (800, 20)
Best parameters found: {'model__max_depth': 3, 'model__n_estimators': 20}


In [6]:
best_pipeline = grid.best_estimator_

model = best_pipeline[0]

print(X_train.shape)
# Transform test set
X_train_transformed = X_train
X_test_transformed = X_test
print(X_train_transformed.shape)

(800, 20)
(800, 20)


In [7]:
# Evaluate the model on the test set in clear
y_pred_clear = model.predict(X_test_transformed)
# print(f"Test accuracy in clear: {(y_pred_clear == y_test).mean():0.2f}")

In [8]:
# Compile the model to FHE
model.compile(X_train_transformed)

<concrete.fhe.compilation.circuit.Circuit at 0x314fceaa0>

In [9]:
N_TEST_FHE = 1
print(X_test_transformed[:N_TEST_FHE])
y_pred_fhe = model.predict(X_test_transformed[:N_TEST_FHE], fhe="execute")

     Age_normalized  Blood_Pressure_normalized  Weight_normalized  \
521        0.887428                   0.217349           0.107291   

     Cholesterol_normalized  Glucose_level_normalized  Heart_rate_normalized  \
521                0.615363                  0.822276               0.582044   

     Respiratory_rate_normalized  Body_Temperature_normalized  \
521                     0.962997                     0.795564   

     Oxygen_Saturation_normalized  BMI_normalized  Liver_Function_normalized  \
521                      0.700792        0.060257                   0.107742   

     Kidney_Function_normalized  Hemoglobin_normalized  \
521                    0.784838               0.198238   

     Platelet_Count_normalized  White_Blood_Cell_Count_normalized  \
521                   0.028835                           0.910761   

     Red_Blood_Cell_Count_normalized  Urea_Level_normalized  \
521                         0.890805               0.218484   

     Creatinine_Level_nor

In [10]:
print(f"{(y_pred_fhe == y_pred_clear[:N_TEST_FHE]).sum()} "
      f"examples over {N_TEST_FHE} have an FHE inference equal to the clear inference.")


print(f"Prediction: {labels[y_pred_fhe[0]]}")

1 examples over 1 have an FHE inference equal to the clear inference.
Prediction: Coronary Artery Disease


In [11]:
def delete_directory_recursively(directory):
    for root, dirs, files in os.walk(directory, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
    os.rmdir(directory)

# Setup the development environment

if os.path.exists(FHE_DIRECTORY):
    # Delete the directory recursively if it already exists
    delete_directory_recursively(FHE_DIRECTORY)

In [16]:
dev = FHEModelDev(path_dir=FHE_DIRECTORY, model=model)
dev.save()

# Setup the client
client = FHEModelClient(path_dir=FHE_DIRECTORY, key_dir="/tmp/keys_client")
serialized_evaluation_keys = client.get_serialized_evaluation_keys()

# Client pre-processes new data
X_new = np.random.rand(1, 20)
print(f"New data: {X_new}")
encrypted_data = client.quantize_encrypt_serialize(X_new)

# Setup the server
server = FHEModelServer(path_dir=FHE_DIRECTORY)
server.load()

# Server processes the encrypted data
encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)

# Client decrypts the result
result = client.deserialize_decrypt_dequantize(encrypted_result)

result = labels[np.argmax(result)]
print(f"Prediction: {result}")

New data: [[0.85329019 0.48979363 0.20382468 0.42350831 0.33421152 0.45882316
  0.49870113 0.34251145 0.24217051 0.26723796 0.76525345 0.29732267
  0.21923485 0.64320382 0.20342162 0.29740966 0.18551907 0.37261177
  0.09904469 0.21426128]]
Prediction: Asthma
