In [34]:
# ##! IGNORE THIS if running on Google Colab
# %load_ext notexbook

# %texify

#### Logistic Regression HE

An Example of Logistic Regression Model using **P**artially **H**omomorphic **E**ncryption (`phe`) Python Libray. 

Note: This example has been adapted from the original example on `phe` [repo](https://github.com/data61/python-paillier/blob/master/examples/logistic_regression_encrypted_model.py)

In [1]:
import time
from contextlib import contextmanager

import numpy as np
from sklearn.linear_model import LogisticRegression

import phe as paillier

In [2]:
np.random.seed(123456)  # Initialise Random Seed for reproducibility

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [16]:
def get_winsconsin_bc_dataset():
    """
    Get the Breast Cancer Winsconsin Dataset,
    as split in Training and Test partitions
    """
    X, y = load_breast_cancer(return_X_y=True)
    y[y == 0] = -1  # so we can take the sign later :)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, stratify=y, random_state=42
    )
    return X_train, y_train, X_test, y_test

In [17]:
X_train

NameError: name 'X_train' is not defined

In [18]:
@contextmanager
def timer():
    """Helper for measuring runtime"""
    time0 = time.perf_counter()
    yield
    print("[elapsed time: %.2f s]" % (time.perf_counter() - time0))

**Alice**: Train a Logistic Regression Model on plain data, encrypts the model (parameters), and decrypts the scrores using PHE.

In [19]:
class Alice:
    """
    Trains a Logistic Regression model on plaintext data,
    encrypts the model for remote use,
    decrypts encrypted scores using the paillier private key.
    """

    def __init__(self):
        self.model = LogisticRegression()
# paillier key generations
    def generate_paillier_keypair(self, n_length):
        self.pubkey, self.privkey = paillier.generate_paillier_keypair(
            n_length=n_length
        )
# Model fit
    def fit(self, X, y):
        self.model = self.model.fit(X, y)
# model prediction
    def predict(self, X):
        return self.model.predict(X)
# encypted weights and intercepts
    def encrypt_weights(self):
        coef = self.model.coef_[0, :]
        encrypted_weights = [self.pubkey.encrypt(coef[i]) for i in range(coef.shape[0])]
        encrypted_intercept = self.pubkey.encrypt(self.model.intercept_[0])
        return encrypted_weights, encrypted_intercept

    def decrypt_scores(self, encrypted_scores):
        return [self.privkey.decrypt(s) for s in encrypted_scores]

**Bob**: Receives the encrypted model and the public key. 
Generate scores with the encrypted model but **cannot decrypt**.

In [20]:
class Bob:
    """
    Is given the encrypted model and the public key.
    Scores local plaintext data with the encrypted model, but cannot decrypt
    the scores without the private key held by Alice.
    """
# generate public key for decription
    def __init__(self, pubkey):
        self.pubkey = pubkey
# Take the encrypted weightd and interceots
    def set_weights(self, weights, intercept):
        self.weights = weights
        self.intercept = intercept
# 
    def encrypted_score(self, x):
        """Compute the score of `x` by multiplying with the encrypted model,
        which is a vector of `paillier.EncryptedNumber`"""
        score = self.intercept
        idx, *rest = x.nonzero()
        for i in idx:
            score += x[i] * self.weights[i]
        return score

    def encrypted_evaluate(self, X):
        return [self.encrypted_score(X[i, :]) for i in range(X.shape[0])]

In [21]:
X_train, y_train, X_test, y_test = get_winsconsin_bc_dataset()

# Feature Scaling
from sklearn.preprocessing import RobustScaler

sc = RobustScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [22]:
X_train

array([[ 1.60851064,  0.24720069,  1.5354861 , ...,  0.64490763,
        -0.38469638,  0.55422561],
       [-0.09361702, -0.98277347, -0.14332585, ..., -0.44025506,
        -0.72165672, -0.51988877],
       [ 0.93981763, -0.41774332,  1.03470903, ...,  1.5895355 ,
         1.74517375,  1.25740539],
       ...,
       [ 0.53130699,  0.8828596 ,  0.61336557, ...,  1.19938503,
         0.26676027,  1.29609479],
       [ 0.91793313,  0.04392765,  0.94491452, ...,  1.58247851,
         0.4015444 ,  0.5179543 ],
       [-0.85057751, -0.3161068 , -0.81298567, ..., -0.44348111,
        -0.40716041,  0.23116915]])

# Alice: Generating paillier keypair

In [23]:
print("Alice: Generating paillier keypair")
alice = Alice()
# NOTE: using smaller keys sizes wouldn't be cryptographically safe
alice.generate_paillier_keypair(n_length=1024)

Alice: Generating paillier keypair


In [24]:
print("Alice: Training BC Classifier")
with timer() as t:
    alice.fit(X_train, y_train)

Alice: Training BC Classifier
[elapsed time: 0.95 s]


**Just test Model performance on Test** as Alice would have access to Bob's (test) data

In [25]:
print(
    "Classify with model in the clear -- "
    "what Alice would get having Bob's data locally"
)
with timer() as t:
    error = np.mean(alice.predict(X_test) != y_test)
print("Error {:.3f}".format(error))

Classify with model in the clear -- what Alice would get having Bob's data locally
[elapsed time: 0.00 s]
Error 0.014


In [26]:
1 - 0.014  # 98%

0.986

Now Alice encrypts her (trained) model Parameters

# ALice : get the encrypted weights and intercepts

In [27]:
print("Alice: Encrypting classifier")
with timer() as t:
    encrypted_weights, encrypted_intercept = alice.encrypt_weights()

Alice: Encrypting classifier
[elapsed time: 0.36 s]


In [28]:
encrypted_intercept

<phe.paillier.EncryptedNumber at 0x254d78a00a0>

In [29]:
encrypted_weights

[<phe.paillier.EncryptedNumber at 0x254d789f130>,
 <phe.paillier.EncryptedNumber at 0x254d789fbb0>,
 <phe.paillier.EncryptedNumber at 0x254bd69f1c0>,
 <phe.paillier.EncryptedNumber at 0x254bd69f400>,
 <phe.paillier.EncryptedNumber at 0x254bd69fc10>,
 <phe.paillier.EncryptedNumber at 0x254bd69fc40>,
 <phe.paillier.EncryptedNumber at 0x254bd693bb0>,
 <phe.paillier.EncryptedNumber at 0x254bd693a00>,
 <phe.paillier.EncryptedNumber at 0x254d789f7c0>,
 <phe.paillier.EncryptedNumber at 0x254d789f940>,
 <phe.paillier.EncryptedNumber at 0x254d789fe50>,
 <phe.paillier.EncryptedNumber at 0x254d789f4f0>,
 <phe.paillier.EncryptedNumber at 0x254d789f7f0>,
 <phe.paillier.EncryptedNumber at 0x254d789f760>,
 <phe.paillier.EncryptedNumber at 0x254d789f4c0>,
 <phe.paillier.EncryptedNumber at 0x254d65c29a0>,
 <phe.paillier.EncryptedNumber at 0x254d65c2a60>,
 <phe.paillier.EncryptedNumber at 0x254d65c29d0>,
 <phe.paillier.EncryptedNumber at 0x254d78a0fa0>,
 <phe.paillier.EncryptedNumber at 0x254d78a0dc0>,


In [30]:
print("Bob: Scoring with encrypted classifier")
bob = Bob(alice.pubkey)  # defoine a constructor
bob.set_weights(encrypted_weights, encrypted_intercept)

Bob: Scoring with encrypted classifier


In [31]:
with timer() as t:
    encrypted_scores = bob.encrypted_evaluate(X_test)

[elapsed time: 5.33 s]


**Finally** Alice needs to _descrypt_ Bob's scores on test data

In [32]:
print("Alice: Decrypting Bob's scores")
with timer() as t:
    scores = alice.decrypt_scores(encrypted_scores)

Alice: Decrypting Bob's scores
[elapsed time: 0.57 s]


In [33]:
error = np.mean(np.sign(scores) != y_test)
print(
    "Error {:.3f} -- this is not known to Alice, who does not possess "
    "the ground truth labels".format(error)
)

Error 0.014 -- this is not known to Alice, who does not possess the ground truth labels


## Where to go next ?

If you are interested to see an example of `FL` w/ `HE`, you could have a look at this [notebook](https://nbviewer.org/github/leriomaggio/privacy-preserving-data-science/blob/main/3-federated-learning-he/3%20Federated%20Learning%20and%20HE.ipynb)