In [4]:
import pandas as pd
import numpy as np 
from pathlib import Path
import json

import matplotlib.pyplot as plt 
from matplotlib.ticker import FuncFormatter
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler


from src.concrete.ml.sklearn import SGDClassifier


In [5]:
N_ITERATIONS = 15
RANDOM_STATE = 42
X, y = datasets.load_breast_cancer(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

scaler = MinMaxScaler(feature_range=[-1, 1])
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

rng = np.random.default_rng(RANDOM_STATE)
perm = rng.permutation(x_train.shape[0])

x_train = x_train[perm, ::]
y_train = y_train[perm]

In [6]:
parameters_range = (-1.0, 1.0)

model_concrete = SGDClassifier(
    random_state=RANDOM_STATE,
    max_iter=N_ITERATIONS,
    fit_encrypted=True,
    parameters_range=parameters_range,
)

# Train with simulation on the full dataset
model_concrete.fit(x_train, y_train, fhe="execute")

model_concrete.compile(x_train)

dumped_model_path = Path("demo-data/classifier_fhe.json")

# Any kind of file-like object can be used 
with dumped_model_path.open("w") as f:
    # Dump the model in a file
    model_concrete.dump(f)

with open("demo-data/classifier_fhe.json", 'r') as j:
     contents = json.loads(j.read())

model = contents['type_name']
serialized_value = np.array(contents['serialized_value']['_q_weights']['serialized_value'])


# Measure accuracy on the test set using simulation
y_pred_fhe, proof = model_concrete.predict(x_test, model, serialized_value, "42", fhe="execute")

accuracy_fhe = accuracy_score(y_test, y_pred_fhe)

print(proof)

053fc05051867f0f12e80f4ea5caed1f65697d636025c49f6584b3be7745d59f


In [7]:
accuracy_fhe

0.8187134502923976