In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import joblib
import statsmodels.api as sm
from statsmodels.formula.api import ols
%matplotlib inline
plt.style.use('dark_background')

In [2]:
weights_paillier,bias_paillier,unenc_preds_paillier,cpu_seconds_paillier,cpu_usages_paillier,train_time_paillier,test_time_paillier=joblib.load('variables_paillier.pkl')

In [3]:
weights_ckks,bias_ckks,unenc_preds_ckks,cpu_seconds_ckks,cpu_usages_ckks,train_time_ckks,test_time_ckks=joblib.load('variables (1).pkl')

In [4]:
weights_sklearn,bias_sklearn,unenc_preds_sklearn=joblib.load('variables_sklearn.pkl')

In [5]:
y_test=pd.read_csv('y_test1.csv')
y_test=y_test.to_numpy()

In [6]:
y_test=y_test.reshape(-1,)
y_test

array([71.2, 75.1, 67.7, 71.3, 73.1, 55.3, 61.4, 74.1, 78.1, 76.8, 56.7,
       73. , 73.9, 69.1, 44.5, 68.9, 59.2, 58. , 72.5, 71.9, 69.4, 72.2,
       72.8, 75. , 75.9, 61.8, 76.5, 59.2, 52.4, 71.9, 59.5, 62.7, 71.6,
       62.2, 79.4, 74.5, 76. , 73.2, 51.2, 69. , 61.6, 69.9, 67.3, 83. ,
       67.7, 68. , 71.1, 74.8, 77.5, 73. , 75.3, 44.3, 73.6, 74.7, 75.5,
       77. , 57.5, 73.1, 74.8, 71. , 72.4, 67.1, 75. , 71.8, 45.4, 69.7,
       69.8, 76.2, 77.7, 69.6, 72. , 73.9, 73.2, 75. , 54.8, 74.2, 51. ,
       73.8, 68.1, 74.9, 61.1, 70. , 68.1, 64.2, 73.6, 71.9, 77. , 58.6])

In [7]:
unenc_preds_paillier=np.array(unenc_preds_paillier)
unenc_preds_paillier.shape

(88,)

In [8]:
y_test.shape

(88,)

In [9]:
unenc_preds_ckks=np.array(unenc_preds_ckks)
unenc_preds_ckks=unenc_preds_ckks.reshape(-1,)
unenc_preds_ckks.shape

(88,)

In [10]:
unenc_preds_sklearn=unenc_preds_sklearn.reshape(-1,)
unenc_preds_sklearn.shape

(88,)

### Mean Squared Error:

In [11]:
from sklearn.metrics import mean_squared_error

In [12]:
mse_sklearn=mean_squared_error(unenc_preds_sklearn,y_test)

In [13]:
mse_sklearn

10.975861845186673

In [14]:
mse_ckks=mean_squared_error(unenc_preds_ckks,y_test)
mse_ckks

10.974740524313926

In [15]:
mse_paillier=mean_squared_error(unenc_preds_paillier,y_test)
mse_paillier

10.975861844987772

In [16]:
def calculate_variance(numbers):
    
    if len(numbers) < 2:
        raise ValueError("Variance requires at least two data points.")
    
    mean = sum(numbers) / len(numbers)
    squared_diffs = [(x - mean) ** 2 for x in numbers]
    variance = sum(squared_diffs) / (len(numbers) - 1)
    
    return variance


print("Variance:", calculate_variance(unenc_preds_sklearn))
print("Variance:", calculate_variance(unenc_preds_ckks))
print("Variance:", calculate_variance(unenc_preds_paillier))

Variance: 65.24577198132666
Variance: 65.24735312904686
Variance: 65.24577198053012


In [17]:
f_statistic,p_value=stats.f_oneway(unenc_preds_sklearn,unenc_preds_ckks,unenc_preds_paillier)

In [18]:
f_statistic,p_value

(2.4469554477310947e-08, 0.999999975530437)

In [19]:
alpha = 0.05
if p_value < alpha:
    print("There is a significant difference between the values")
else:
    print("There is no significant difference between the values")

There is no significant difference between the values


In [20]:
diff_between_ckks_and_sklearn=abs(unenc_preds_ckks-unenc_preds_sklearn)

In [21]:
diff_between_ckks_and_sklearn[:5]

array([0.00013503, 0.00246553, 0.0016008 , 0.00168676, 0.00067318])

In [22]:
diff_between_paillier_and_sklearn=abs(unenc_preds_paillier-unenc_preds_sklearn)

In [23]:
diff_between_paillier_and_sklearn

array([4.71302997e-09, 4.85123053e-09, 4.68284611e-09, 5.01141528e-09,
       4.83423435e-09, 4.30355840e-09, 4.87098362e-09, 4.92838126e-09,
       4.56505234e-09, 4.88438445e-09, 4.23049329e-09, 4.96356733e-09,
       4.73040984e-09, 4.64793004e-09, 5.21416865e-09, 4.96376629e-09,
       4.74231854e-09, 4.59260008e-09, 5.00079977e-09, 5.00523356e-09,
       4.37903225e-09, 4.79688822e-09, 4.92110530e-09, 4.75111506e-09,
       4.49929871e-09, 4.85702856e-09, 4.84546092e-09, 4.54399895e-09,
       4.87725771e-09, 4.92785546e-09, 3.96481425e-09, 4.70051020e-09,
       5.15427701e-09, 4.45037074e-09, 4.76786965e-09, 4.52246240e-09,
       4.44086368e-09, 4.87187890e-09, 4.59736071e-09, 5.00895680e-09,
       4.87953855e-09, 5.30583577e-09, 4.86220131e-09, 4.93197660e-09,
       4.76771334e-09, 5.04590503e-09, 5.04130071e-09, 5.00557462e-09,
       4.27543512e-09, 4.90258856e-09, 4.52872939e-09, 5.16583754e-09,
       4.67591121e-09, 5.08674702e-09, 4.48409310e-09, 5.39260725e-09,
      

In [24]:
t_statistic,p_value=stats.ttest_ind(diff_between_paillier_and_sklearn,diff_between_ckks_and_sklearn)
t_statistic,p_value

(-15.395633375015532, 2.630174492968315e-34)

In [25]:
alpha = 0.05
if p_value < alpha:
    print("There is a significant difference between the ckks-sklearn preds and paillier-sklearn preds")
else:
    print("There is no significant difference between the ckks-sklearn preds and paillier-sklearn preds")

There is a significant difference between the ckks-sklearn preds and paillier-sklearn preds


In [26]:
l=len(diff_between_paillier_and_sklearn)

In [27]:
mse_ckks_with_sklearn=sum(diff_between_ckks_and_sklearn**2)/l
mse_ckks_with_sklearn

2.7284637315140927e-06

In [28]:
mse_paillier_with_sklearn=sum(diff_between_paillier_and_sklearn**2)/l
mse_paillier_with_sklearn

2.2830999909672432e-17

### training time:

In [29]:
if train_time_ckks<train_time_paillier:
    print("ckks encryption takes less time to train the model")
else:
    print("paillier encryption takes less time to train the model")

ckks encryption takes less time to train the model


In [30]:
if test_time_ckks<test_time_paillier:
    print("ckks encrypted data takes less time to test the model")
else:
    print("paillier encrypted data takes less time to train the model")

ckks encrypted data takes less time to test the model


In [31]:
train_time_ckks

37.485349893569946

In [32]:
train_time_paillier

1449.4497618675232

In [33]:
test_time_ckks

1.8246443271636963

In [34]:
test_time_paillier

24.461728811264038

In [35]:
joblib.dump(train_time_ckks,'train_time_ckks_348_21.pkl')
joblib.dump(train_time_paillier,'train_time_paillier_348_21.pkl')
joblib.dump(test_time_ckks,'test_time_ckks_348_21.pkl')
joblib.dump(test_time_paillier,'test_time_paillier_348_21.pkl')

['test_time_paillier_348_21.pkl']

### CPU % utilization:

In [36]:
def shortlist_cpu_usage(cpu):
    cpu_use=[]
    for i in cpu:
        if i>80:
            cpu_use.append(i)

    return cpu_use

In [37]:
paillier_cpu_use_short=shortlist_cpu_usage(cpu_usages_paillier)
paillier_cpu_use_short

[101.9,
 100.7,
 99.9,
 101.1,
 102.2,
 101.6,
 100.6,
 101.0,
 98.4,
 97.2,
 97.3,
 99.2,
 98.7,
 104.3,
 100.6,
 102.5,
 99.1,
 102.0,
 103.0,
 96.3,
 101.0,
 95.7,
 97.6,
 96.4,
 104.1,
 100.6,
 101.3,
 102.8,
 100.2,
 101.5,
 101.6,
 100.5,
 99.3,
 98.3,
 97.3,
 96.7,
 97.8,
 102.2,
 99.9,
 100.6,
 97.6,
 99.7,
 103.0,
 100.0,
 102.2,
 101.3,
 99.4,
 97.2,
 96.2,
 97.2,
 102.6,
 102.2,
 98.4,
 100.2,
 97.2,
 95.5,
 98.2,
 101.7,
 98.3,
 97.1,
 96.9,
 97.4,
 96.5,
 101.6,
 99.3,
 100.7,
 102.6,
 100.5,
 103.1,
 100.4,
 102.0,
 99.3,
 95.3,
 97.4,
 96.7,
 98.5,
 100.3,
 101.1,
 102.2,
 102.3,
 101.0,
 101.1,
 102.7,
 102.3,
 99.4,
 97.3,
 96.4,
 95.4,
 102.3,
 101.4,
 99.8,
 99.4,
 101.9,
 99.5,
 102.0,
 101.1,
 102.3,
 98.6,
 97.1,
 97.3,
 100.2,
 103.2,
 102.2,
 100.4,
 102.7,
 100.6,
 101.9,
 100.0,
 99.7,
 100.0,
 99.1,
 98.1,
 96.1,
 98.0,
 98.3,
 98.5,
 101.5,
 102.4,
 99.3,
 104.6,
 97.8,
 100.0,
 99.4,
 95.9,
 96.1,
 96.4,
 101.8,
 102.2,
 100.5,
 101.4,
 99.7,
 102.0,
 100.3

In [38]:
paillier_cpu_use_short=np.array(paillier_cpu_use_short)

In [39]:
paillier_cpu_use_short.mean()

99.84461426491994

In [40]:
paillier_cpu_use_short.sum()

137186.5

In [41]:
cpu_seconds_for_paillier=paillier_cpu_use_short.sum()/100
cpu_seconds_for_paillier

1371.865

In [42]:
paillier_cpu_use_short.max()

116.7

In [43]:
paillier_cpu_use_short.min()

83.5

In [44]:
len(paillier_cpu_use_short)

1374

In [45]:
cpu_percent_consumption_paillier=(paillier_cpu_use_short.sum()/(len(paillier_cpu_use_short)*200))*100

In [46]:
cpu_percent_consumption_paillier

49.92230713245997

In [47]:
ckks_cpu_use_short=shortlist_cpu_usage(cpu_usages_ckks)
ckks_cpu_use_short

[107.5,
 98.6,
 90.6,
 97.9,
 96.0,
 116.2,
 105.6,
 112.8,
 100.4,
 109.5,
 110.1,
 140.6,
 96.4,
 96.6,
 95.6,
 110.8,
 110.8,
 100.9,
 106.8,
 116.7,
 95.1,
 94.6,
 96.1,
 97.6,
 103.1,
 105.6,
 106.2]

In [48]:
ckks_cpu_use_short=np.array(ckks_cpu_use_short)

In [49]:
ckks_cpu_use_short.mean()

104.39629629629628

In [50]:
ckks_cpu_use_short.sum()

2818.7

In [51]:
cpu_seconds_for_ckks=ckks_cpu_use_short.sum()/100
cpu_seconds_for_ckks

28.186999999999998

In [52]:
ckks_cpu_use_short.max()

140.6

In [53]:
ckks_cpu_use_short.min()

90.6

In [54]:
len(ckks_cpu_use_short)

27

In [55]:
cpu_percent_consumption_ckks=(ckks_cpu_use_short.sum()/(len(ckks_cpu_use_short)*200))*100
cpu_percent_consumption_ckks

52.19814814814815

In [56]:
joblib.dump((ckks_cpu_use_short.mean(),ckks_cpu_use_short.sum(),cpu_seconds_for_ckks,ckks_cpu_use_short.max(),ckks_cpu_use_short.min(),len(ckks_cpu_use_short),cpu_percent_consumption_ckks),'cpu_util_insights_ckks_348_21.pkl')

['cpu_util_insights_ckks_348_21.pkl']

In [57]:
joblib.dump((paillier_cpu_use_short.mean(),paillier_cpu_use_short.sum(),cpu_seconds_for_paillier,paillier_cpu_use_short.max(),paillier_cpu_use_short.min(),len(paillier_cpu_use_short),cpu_percent_consumption_paillier),'cpu_util_insights_paillier_1348_21.pkl')

['cpu_util_insights_paillier_1348_21.pkl']