#Scenario System Design

In [4]:
#install package for encryption
!pip install phe

Collecting phe
  Downloading phe-1.5.0-py2.py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m51.2/53.7 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.7/53.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: phe
Successfully installed phe-1.5.0


In [5]:
#import libraries
import os
import math
import time
import numpy as np
import pandas as pd
import phe.encoding
from phe import paillier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [33]:
# Functions

class ExampleEncodedNumber(phe.encoding.EncodedNumber):
    BASE = 64
    LOG2_BASE = math.log(BASE, 2)

def encrypt_vector(vec, public_key):
  # encoded_vector = [ExampleEncodedNumber.encode(public_key, v) for v in vec]
  # encrypted_vector = [public_key.encrypt(ev) for ev in encoded_vector]
  encrypted_vector = [public_key.encrypt(ev) for ev in vec]
  return encrypted_vector

def decrypt_vector(vec, private_key):
  # decoded_vector = [private_key.decrypt_encoded(x, ExampleEncodedNumber) for x in vec]
  # decrypted_vector = [d.decode() for d in decoded_vector]
  decrypted_vector = [private_key.decrypt(x) for x in vec]
  return decrypted_vector

def load_data(input_file, target_feature):
  # Load data and separate the features and labels
  data = pd.read_csv(input_file).dropna()
  X = data.drop(target_feature, axis=1).values.tolist()
  y = data[target_feature].values.tolist()
  return X, y


##Bank Server

In [32]:
class Client:

    def __init__(self, key_length):
        # Generate the public and private keys for Paillier encryption
        public_key, private_key = paillier.generate_paillier_keypair(n_length=key_length)
        self.public_key, self.private_key = public_key, private_key

    def encrypt_data(self, input_file, target_feature):
        # Encrypt the testing data and labels
        self.X_test, self.y_test = load_data(input_file, target_feature)
        self.X_test_encrypted = [encrypt_vector(x, self.public_key) for x in self.X_test]
        self.y_test_encrypted = encrypt_vector(self.y_test, self.public_key)
        return self.X_test_encrypted, self.y_test_encrypted

    def eval(self, encrypted_predictions):
        logits = decrypt_vector(encrypted_predictions, self.private_key)
        y_pred = [1 if l>0 else 0 for l in logits]
        test_accuracy = accuracy_score(self.y_test, y_pred)
        return test_accuracy

##PTFI Server

We are using SVM Classifier model.


In [34]:
class Server:

    def __init__(self, input_file, target_feature):
        self.model = None
        self.X_train, self.y_train = load_data(input_file, target_feature)

    def train_model(self,hyperparams):
        # Train the SVM model
        svm_model = SVC(kernel=hyperparams['kernel'], C=hyperparams['C'], gamma=hyperparams['gamma'])
        svm_model.fit(self.X_train, self.y_train)
        self.model = svm_model

    def predict(self, X_test_encrypted):
        # Make predictions on the encrypted test data using the trained model
        encrypted_logits = list()
        w = self.model.coef_[0]
        b = self.model.intercept_[0]
        for x in X_test_encrypted:
          score = b
          for i in range(len(x)):
            score += x[i]*w[i]
          encrypted_logits.append(score)
        return encrypted_logits

Data Transfer code

Lets assume that we are our SVM model will predict fraud. We first train model in our server with our own dataset. Then we encrypt the bank dataset on the bank server machine.The bank will than send the encrypted data to our server to use our model to predict fraud with their own dataset. Then we send the encrypted output prediction back to the bank where the bank will decrypt the output prediction.

In [None]:
start_time = time.time()

# Parameters
key_length = 1024
target_feature = "IsFraud"
hyperparameters = {'kernel':'linear', 'C':1, 'gamma':'auto'}

# Instantiate Server
server = Server("PTFItrainingdataset.csv", target_feature)
# Train SVM Classifier
svm_model = server.train_model(hyperparameters)

# Instantiate Bank
client = Client(key_length)
# Encrypt data on bank machine
X_test_encrypted, y_test_encrypted = client.encrypt_data("bank/datasettest.csv", target_feature)

# Send encrypted data to server for inference
encrypted_preds = server.predict(X_test_encrypted)

# Send encrypted predictions back to bank and evaluate accuracy
test_accuracy = client.eval(encrypted_preds)
print(f'\n\nTest accuracy for Privacy-preserving SVM Model is {round(test_accuracy, 2)}')

end_time = time.time()
time_taken = (end_time - start_time)
print(f'\n\nTotal Time Taken: = {time_taken//60} mins {round(time_taken%60,2)} secs')