In [2]:
import numpy as np
import pandas as pd
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import StandardScaler


In [3]:
df = pd.read_csv('payment_fraud.csv')

label_encoder = LabelEncoder()
df['paymentMethod_encoded'] = label_encoder.fit_transform(df['paymentMethod'])

features = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethod_encoded', 'paymentMethodAgeDays']
X = df[features].values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#scaler = StandardScaler()
#X_train = scaler.fit_transform(X_train)  # Fit on training data and transform it
#X_test = scaler.transform(X_test)  # Transform test data using the same scaler


In [4]:
# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Encryption setup with TenSEAL
context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=8192, coeff_mod_bit_sizes=[40, 21, 21, 21, 21, 21, 21,40])
#context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=16384, coeff_mod_bit_sizes=[60, 40, 40, 60, 60])
context.generate_galois_keys()
context.global_scale = 2 ** 40

# Encrypt training and testing data
X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train]
X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test]


In [6]:
X_train_encrypted_np = np.array( X_train_encrypted)
#X_test_encrypted_np = np.array( X_test_encrypted)
X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], -1)
#X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], X_test_encrypted_np.shape[1])
print(X_train_encrypted_np.shape)
#print(X_test_encrypted_np.shape)
print(X_train_encrypted_np.shape)
print(X_train_encrypted_np[:5])
X_train_encrypted_np = X_train_encrypted_np.astype(np.float64)
y_train = y_train.astype(np.float64)
if np.any(np.isnan(X_train_encrypted_np)) or np.any(np.isinf(X_train_encrypted_np)):
    print("NaN or Inf detected in X_train_encrypted_np")
if np.any(np.isnan(y_train)) or np.any(np.isinf(y_train)):
    print("NaN or Inf detected in y_train")

(31376, 1)
(31376, 1)
[[<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025BEC1716C0>]
 [<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025BEC173160>]
 [<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025BA9960A30>]
 [<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025BA98621A0>]
 [<tenseal.tensors.ckksvector.CKKSVector object at 0x0000025BEC1731C0>]]


TypeError: float() argument must be a string or a real number, not 'CKKSVector'

In [None]:
X_train_encrypted_np_small = X_train_encrypted_np[:100]  # Use only the first 100 samples
y_train_small = y_train[:100]

In [None]:
# Define functions if not already available
def batch_predict(X, weights):
    # Simple linear model prediction (e.g., X @ weights for matrix multiplication)
    return X @ weights

def compute_gradients(X, y, predictions):
    # Compute the gradients using mean squared error as an example
    errors = predictions - y
    gradients = X.T @ errors / len(y)  # Mean of the gradient
    return gradients

def parallel_predict(X, weights):
    # Parallelized version, could be similar to batch_predict if parallelism is not needed
    return batch_predict(X, weights)

# Load data (example placeholders)
# Ensure that X_train, X_test, y_train, and y_test are properly defined before running this code
# For example: X_train, X_test, y_train, y_test = train_test_split(...)

# Initialize weights for a simple linear model
input_dim = X_train_encrypted_np.shape[1]  # Number of features
output_dim = 1  # Binary classification output

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.01  # Reduced learning rate
epochs = 100  # Reduced epochs

# Profile the training loop
for epoch in range(epochs):
    print(f"Starting epoch {epoch+1}/{epochs}")
    predictions = batch_predict(X_train_encrypted_np, weights)
    gradients = compute_gradients(X_train_encrypted_np, y_train, predictions)
    weights = weights - learning_rate * gradients

    if np.any(np.isnan(predictions)) or np.any(np.isinf(predictions)):
        print("NaN or Inf detected in predictions")
        break
    if np.any(np.isnan(gradients)) or np.any(np.isinf(gradients)):
        print("NaN or Inf detected in gradients")
        break

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Weights update: {weights}")

# Check predictions on test data
y_pred = parallel_predict(X_test, weights)

# Threshold predictions to get binary classification results
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Calculate and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels, zero_division=1)  # Handle zero precision cases
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Starting epoch 1/100


In [None]:

# Define functions if not already available
def batch_predict(X, weights):
    # Simple linear model prediction (e.g., X @ weights for matrix multiplication)
    return X @ weights

def compute_gradients(X, y, predictions):
    # Compute the gradients using mean squared error as an example
    errors = predictions - y
    gradients = X.T @ errors / len(y)  # Mean of the gradient
    return gradients

def parallel_predict(X, weights):
    # Parallelized version, could be similar to batch_predict if parallelism is not needed
    return batch_predict(X, weights)

# Load data (example placeholders)
# Ensure that X_train, X_test, y_train, and y_test are properly defined before running this code
# For example: X_train, X_test, y_train, y_test = train_test_split(...)

# Initialize weights for a simple linear model
input_dim = X_train.shape[1]  # Number of features
output_dim = 1  # Binary classification output

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.01  # Reduced learning rate
epochs = 100  # Reduced epochs

# Training loop with optimizations
for epoch in range(epochs):
    # Forward pass: calculate predictions
    predictions = batch_predict(X_train, weights)

    # Calculate gradients and update weights
    gradients = compute_gradients(X_train, y_train, predictions)
    weights = weights - learning_rate * gradients

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Weights update: {weights}")

# Check predictions on test data
y_pred = parallel_predict(X_test, weights)

# Threshold predictions to get binary classification results
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Calculate and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels, zero_division=1)  # Handle zero precision cases
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
