In [1]:
!pip install phe

Collecting phe
  Downloading phe-1.5.0-py2.py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.7/53.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: phe
Successfully installed phe-1.5.0


In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from scipy.special import expit
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train=pd.read_csv('X_train_classification.csv')
X_train=X_train.to_numpy()
X_test=pd.read_csv('X_test_classification.csv')
X_test=X_test.to_numpy()
y_train=pd.read_csv('y_train_classification.csv')
y_train=y_train.to_numpy()
y_train=y_train.reshape(-1)
y_test=pd.read_csv('y_test_classification.csv')
y_test=y_test.to_numpy().reshape(-1)

In [4]:
from phe import paillier
public_key,private_key=paillier.generate_paillier_keypair()

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
def paillier_multiplication_edge(a,b):
    a=private_key.decrypt(a)
    a=a*b
    return a

In [7]:
def paillier_multiplication_cloud(a,b):
    noise1=10
    noise2=20
    a=a+noise1
    sub_mass=(b*noise1)*(-1)
    sol=paillier_multiplication_edge(a,b)
    sol=sol+sub_mass
    return sol

In [8]:
def sigmoid(z):
    z=private_key.decrypt(z)
    z=expit(z)
    z=public_key.encrypt(z)
    return z

In [9]:
m, n = X_train.shape
weights = np.zeros(n)
bias = 0
learning_rate=0.01

In [10]:
enc_X_train=[]
for i in X_train:
    row_in=[]
    for j in i:
        p=public_key.encrypt(j)
        row_in.append(p)
    enc_X_train.append(row_in)

In [11]:
enc_y_train=[]
for i in y_train:
    p=public_key.encrypt(int(i))
    enc_y_train.append(p)

In [12]:
import time
import psutil
import os
from threading import Thread

In [13]:
def get_cpu_and_ram_utilization(pid, duration):
    process = psutil.Process(pid)
    cpu_usages = []
    ram_usages = []
    start_time = time.time()
    while time.time() - start_time < duration:
        cpu_usage = process.cpu_percent(interval=1)
        ram_usage = process.memory_percent()
        cpu_usages.append(cpu_usage)
        ram_usages.append(ram_usage)
        print(f"CPU utilization: {cpu_usage}% | RAM utilization: {ram_usage}%")
    total_cpu_usage = sum(cpu_usages)
    return total_cpu_usage, cpu_usages, ram_usages

In [14]:
pid = os.getpid()

# Duration to monitor CPU usage
monitor_duration = 320

In [15]:
time.sleep(15)

In [16]:
results={}
def logistic_regression_sgd(X, y, n, learning_rate=0.01, epochs=3):
    train_time_start=time.time()
    weights = np.zeros(n)

    weights_enc=[]
    for i in weights:
        p=public_key.encrypt(i)
        weights_enc.append(p)

    bias=0
    bias_enc=public_key.encrypt(bias)

    for epoch in range(epochs):
        indices = np.random.permutation(m)
        for i in indices:
            xi=enc_X_train[i]
            yi=enc_y_train[i]

            linear_output=0
            for i,j in zip(xi,weights_enc):
                p=paillier_multiplication_cloud(i,j)
                linear_output=linear_output+p

            linear_output=linear_output+bias_enc
            y_pred=sigmoid(linear_output)
            db=y_pred + (yi * (-1))
            dw=[]
            for i in xi:
                p=paillier_multiplication_cloud(i,db)
                dw.append(p)

            for i in range(len(weights_enc)):
                p=dw[i]*(-1)*learning_rate
                weights_enc[i]=weights_enc[i]+p

            bias_enc=bias_enc+(db*-1)*learning_rate

    train_time_end=time.time()
    train_time=train_time_end-train_time_start

    results['train_time']=train_time
    results['weights']=weights_enc
    results['bias']=bias_enc
    print(results['train_time'])

In [17]:
train_thread = Thread(target=logistic_regression_sgd, args=(enc_X_train, enc_y_train, n, learning_rate))
train_thread.start()

total_cpu_usage, cpu_usages, ram_usages = get_cpu_and_ram_utilization(pid, monitor_duration)
train_thread.join()

print(f"Total CPU utilization over {monitor_duration} seconds: {total_cpu_usage}%")

# Print the CPU and RAM usage per second
print("CPU usage per second:", cpu_usages)
print("RAM usage per second:", ram_usages)

# Calculate total CPU resource consumption in 'CPU-seconds'
cpu_seconds = sum(cpu_usages) / 100
print(f"Total CPU resource consumption: {cpu_seconds} CPU-seconds")

CPU utilization: 108.7% | RAM utilization: 1.5146804550030624%
CPU utilization: 99.7% | RAM utilization: 1.5146804550030624%
CPU utilization: 91.6% | RAM utilization: 1.5146804550030624%
CPU utilization: 96.6% | RAM utilization: 1.5146804550030624%
CPU utilization: 100.8% | RAM utilization: 1.5146804550030624%
CPU utilization: 93.0% | RAM utilization: 1.5146804550030624%
CPU utilization: 101.8% | RAM utilization: 1.5146804550030624%
CPU utilization: 99.3% | RAM utilization: 1.5146804550030624%
CPU utilization: 100.6% | RAM utilization: 1.5146804550030624%
CPU utilization: 99.2% | RAM utilization: 1.5146804550030624%
CPU utilization: 100.0% | RAM utilization: 1.5146804550030624%
CPU utilization: 98.7% | RAM utilization: 1.5146804550030624%
CPU utilization: 98.4% | RAM utilization: 1.5146804550030624%
CPU utilization: 97.3% | RAM utilization: 1.5146804550030624%
CPU utilization: 98.6% | RAM utilization: 1.5146804550030624%
CPU utilization: 99.2% | RAM utilization: 1.5146804550030624%
CPU

In [18]:
private_key.decrypt(results['bias'])

0.0016347815812693446

In [19]:
for i in results['weights']:
  print(private_key.decrypt(i))

0.12596141476070313
0.7054294238278394


In [20]:
enc_X_test=[]
for i in X_test:
    row_in=[]
    for j in i:
        p=public_key.encrypt(j)
        row_in.append(p)
    enc_X_test.append(row_in)

In [21]:
weights_enc=results['weights']
bias_enc=results['bias']

In [22]:
y_pred_enc=[]
y_pred_unenc=[]

test_time_start=time.time()

for i in enc_X_test:
  summ=0
  for p,q in zip(i,weights_enc):
    summ=summ+paillier_multiplication_cloud(p,q)
  summ=summ+bias_enc
  y_pred=sigmoid(summ)
  y_pred_enc.append(y_pred)

for i in y_pred_enc:
  d=private_key.decrypt(i)
  if d>=0.5:
    y_pred_unenc.append(1)
  else:
    y_pred_unenc.append(0)

test_time_end=time.time()
test_time=test_time_end-test_time_start
print(test_time)

19.11967921257019


In [23]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred_unenc)
print(f'Accuracy: {accuracy}')

Accuracy: 0.9


In [24]:
weights_unenc=[]
for i in weights_enc:
  p=private_key.decrypt(i)
  weights_unenc.append(p)

print(weights_unenc)

[0.12596141476070313, 0.7054294238278394]


In [25]:
bias_unenc=private_key.decrypt(bias_enc)
print(bias_unenc)

0.0016347815812693446


In [26]:
import joblib
joblib.dump((weights_unenc, bias_unenc, accuracy, cpu_seconds, cpu_usages, ram_usages, results['train_time'], test_time), 'variables_paillier.pkl')

['variables_paillier.pkl']