In [119]:
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import os 

from concrete.ml.sklearn.xgb import XGBClassifier
from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

In [107]:
df = pd.read_csv("medical_data.csv")

# print(df.head())
# print(df.columns)

labels = [
    "Hypertension",
    "Diabetes",
    "Asthma",
    "Coronary Artery Disease",
    "Chronic Kidney Disease",
    "Obesity",
    "Anemia",
    "Osteoporosis",
    "Hyperlipidemia",
    "None"
]

FHE_DIRECTORY = 'tmp/fhe_client_server_files/'

In [108]:
y_col = "Label"

X = df.drop(y_col, axis=1)
y = df[y_col] - 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [109]:
# Define our model
model = XGBClassifier(n_jobs=1, n_bits=3)

pipeline = Pipeline(
    [("standard_scaler", StandardScaler()), ("pca", PCA(random_state=0)), ("model", model)]
)

In [110]:
# Define the parameters to tune
param_grid = {
    "pca__n_components": [2, 5, 10, 15],
    "model__max_depth": [2, 3, 5],
    "model__n_estimators": [5, 10, 20],
}

# Instantiate the grid search with 5-fold cross validation on all available cores
grid = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, scoring="accuracy")

# Launch the grid search
grid.fit(X_train, y_train)

print(f"X_train shape: {X_train.shape}")

print(f"Best parameters found: {grid.best_params_}")

X_train shape: (800, 20)
Best parameters found: {'model__max_depth': 2, 'model__n_estimators': 10, 'pca__n_components': 2}


In [111]:
best_pipeline = grid.best_estimator_
data_transformation_pipeline = best_pipeline[:-1]
model = best_pipeline[-1]

# Transform test set
X_train_transformed = data_transformation_pipeline.transform(X_train)
X_test_transformed = data_transformation_pipeline.transform(X_test)

In [112]:
# Evaluate the model on the test set in clear
y_pred_clear = model.predict(X_test_transformed)
# print(f"Test accuracy in clear: {(y_pred_clear == y_test).mean():0.2f}")

In [113]:
# Compile the model to FHE
model.compile(X_train_transformed)

<concrete.fhe.compilation.circuit.Circuit at 0x308025480>

In [114]:
N_TEST_FHE = 10
y_pred_fhe = model.predict(X_test_transformed[:N_TEST_FHE], fhe="execute")

In [115]:
print(f"{(y_pred_fhe == y_pred_clear[:N_TEST_FHE]).sum()} "
      f"examples over {N_TEST_FHE} have an FHE inference equal to the clear inference.")


print(f"Prediction: {labels[y_pred_fhe[0]]}")

10 examples over 10 have an FHE inference equal to the clear inference.
Prediction: Obesity


In [134]:
def delete_directory_recursively(directory):
    for root, dirs, files in os.walk(directory, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
    os.rmdir(directory)

# Setup the development environment

if os.path.exists(FHE_DIRECTORY):
    # Delete the directory recursively if it already exists
    delete_directory_recursively(FHE_DIRECTORY)

In [142]:
dev = FHEModelDev(path_dir=FHE_DIRECTORY, model=model)
dev.save()

# Setup the client
client = FHEModelClient(path_dir=FHE_DIRECTORY, key_dir="/tmp/keys_client")
serialized_evaluation_keys = client.get_serialized_evaluation_keys()

# Client pre-processes new data
X_new = np.random.rand(1, 2)
print(f"New data: {X_new}")
encrypted_data = client.quantize_encrypt_serialize(X_new)

# Setup the server
server = FHEModelServer(path_dir=FHE_DIRECTORY)
server.load()

# Server processes the encrypted data
encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)

# Client decrypts the result
result = client.deserialize_decrypt_dequantize(encrypted_result)
print(f"Prediction: {result}")

New data: [[0.82616204 0.97641661]]
Prediction: [[0.10049049 0.10049049 0.10049049 0.10049049 0.11153934 0.10049049
  0.10049049 0.11153934 0.07348788 0.10049049]]
