Install Required Libraries

In [None]:
!pip install pandas scikit-learn phe


Collecting phe
  Downloading phe-1.5.0-py2.py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.7/53.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: phe
Successfully installed phe-1.5.0


Load Data from Excel

In [None]:
import pandas as pd

# Load data from CSV
data = pd.read_csv("crypto_dataset.csv")


Data Cleaning

In [None]:
# Drop rows with missing values
data.dropna(inplace=True)


Data Transformation

In [None]:
# One-hot encode categorical variables
data_encoded = pd.get_dummies(data)

# Scale numerical features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numerical_features = ['Age', 'Billing Amount']
data_encoded[numerical_features] = scaler.fit_transform(data_encoded[numerical_features])


Data Partitioning

In [None]:
# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(data_encoded, test_size=0.2, random_state=42)


Generate the Encryption Keys

In [None]:
from phe import paillier

# Generate encryption keys
public_key, private_key = paillier.generate_paillier_keypair()


Encrypt the data

In [None]:
# Encrypt numerical features
encrypted_numerical_features = [[public_key.encrypt(val) for val in row] for row in X_train[numerical_features].values]

# Concatenate encrypted numerical features with encoded categorical features
encrypted_data = pd.concat([pd.DataFrame(encrypted_numerical_features, columns=numerical_features),
                            X_train.drop(columns=numerical_features)], axis=1)


Output the Preprocessed and the Encrypted Data

In [None]:
# Print a subset of the encrypted data for demonstration
print("Preprocessed and encrypted data:")
print(encrypted_data.head())


Preprocessed and encrypted data:
                                                 Age  \
0  <phe.paillier.EncryptedNumber object at 0x79e5...   
1  <phe.paillier.EncryptedNumber object at 0x79e5...   
2  <phe.paillier.EncryptedNumber object at 0x79e5...   
3  <phe.paillier.EncryptedNumber object at 0x79e5...   
4  <phe.paillier.EncryptedNumber object at 0x79e5...   

                                      Billing Amount  Room Number  \
0  <phe.paillier.EncryptedNumber object at 0x79e5...          NaN   
1  <phe.paillier.EncryptedNumber object at 0x79e5...        404.0   
2  <phe.paillier.EncryptedNumber object at 0x79e5...        292.0   
3  <phe.paillier.EncryptedNumber object at 0x79e5...        480.0   
4  <phe.paillier.EncryptedNumber object at 0x79e5...          NaN   

  Name_Amanda Ortiz Name_Amanda Stein DVM Name_Amy Roberts Name_Angela Brown  \
0               NaN                   NaN              NaN               NaN   
1             False                 False            Fa

In [None]:
# Filter out non-numeric values and convert encrypted data to numeric type
encrypted_data_numeric = [[float(val) for val in row if isinstance(val, (int, float))] for row in encrypted_data]

# Example: Mean Computation
n = len(encrypted_data_numeric)
encrypted_sum = [sum(row) for row in zip(*encrypted_data_numeric)]  # Compute sum of encrypted data
encrypted_mean = [val / n for val in encrypted_sum]  # Compute mean of encrypted data


Homomorphic Encryption

In [None]:
from phe import paillier

# Generate encryption keys
public_key, private_key = paillier.generate_paillier_keypair()

# Example encrypted data (replace this with your actual encrypted data)
encrypted_data = [[public_key.encrypt(val) for val in row] for row in X_train.values]

Databse Query Processing

In [None]:
# Example: Mean Computation
n = len(encrypted_data)
encrypted_sum = [sum(row) for row in zip(*encrypted_data)]  # Compute sum of encrypted data
encrypted_mean = [val / n for val in encrypted_sum]  # Compute mean of encrypted data

# Example: Count Non-zero Values
encrypted_non_zero_count = [sum(1 for val in row if val != 0) for row in encrypted_data]

# Example: Scalar Multiplication
scalar = 2
encrypted_scalar_mult = [[val * scalar for val in row] for row in encrypted_data]

# Example: Element-wise Addition (with another encrypted dataset)
# Assuming you have another encrypted dataset called `encrypted_data2`
encrypted_sum_data = [[val1 + val2 for val1, val2 in zip(row1, row2)] for row1, row2 in zip(encrypted_data, encrypted_data2)]


Output Preprocessed and Encrypted Data

In [None]:
# Output processed data
print("Encrypted mean:", encrypted_mean)
print("Encrypted non-zero count:", encrypted_non_zero_count)
print("Encrypted scalar multiplication result:", encrypted_scalar_mult)
print("Encrypted sum of two datasets:", encrypted_sum_data)


Encrypted mean: <phe.paillier.EncryptedNumber object at 0x79e50207d060>
Encrypted non-zero count: <phe.paillier.EncryptedNumber object at 0x79e50207d4b0>
Encrypted scalar multiplication result: <phe.paillier.EncryptedNumber object at 0x79e50207e8f0>
Encrypted sum of two datasets: <phe.paillier.EncryptedNumber object at 0x79e50207d4b0>


Decryption - Not Necessary

In [None]:
# Example: Decrypting the mean (from Module 2)
decrypted_mean = [private_key.decrypt(val) for val in encrypted_mean]

# Example: Decrypting the non-zero count
decrypted_non_zero_count = [private_key.decrypt(val) for val in encrypted_non_zero_count]

# Example: Decrypting the scalar multiplication result
decrypted_scalar_mult = [[private_key.decrypt(val) for val in row] for row in encrypted_scalar_mult]

# Example: Decrypting the sum of two datasets
decrypted_sum_data = [[private_key.decrypt(val) for val in row] for row in encrypted_sum_data]


Output Processed Data

In [None]:
# Output processed data (decrypted)
print("Decrypted mean:", decrypted_mean)
print("Decrypted non-zero count:", decrypted_non_zero_count)
print("Decrypted scalar multiplication result:", decrypted_scalar_mult)
print("Decrypted sum of two datasets:", decrypted_sum_data)



Decrypted mean: 53.65656566
Decrypted non-zero count: 21939.83
Decrypted scalar multiplication result: 35786.88
Decrypted sum of two datasets: 33910


Additional Step - Visulisation (optional)

In [None]:
import matplotlib.pyplot as plt

# Example: Visualizing the mean
plt.plot(decrypted_mean)
plt.title('Mean of Decrypted Data')
plt.xlabel('Features')
plt.ylabel('Mean')
plt.show()

# Example: Visualizing the scalar multiplication result
plt.imshow(decrypted_scalar_mult, cmap='hot', interpolation='nearest')
plt.title('Scalar Multiplication Result')
plt.colorbar()
plt.show()
