In [1]:
import numpy as np
import pandas as pd
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor
import time
from sklearn.preprocessing import StandardScaler



In [2]:
# Loading and preprocessing the data
df = pd.read_csv('payment_fraud.csv')

label_encoder = LabelEncoder()
df['paymentMethod_encoded'] = label_encoder.fit_transform(df['paymentMethod'])

features = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethod_encoded', 'paymentMethodAgeDays']
X = df[features].values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test = scaler.transform(X_test)  # Transform test data using the same scaler


In [3]:
# Encryption setup with TenSEAL
context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=16384, coeff_mod_bit_sizes=[60, 40, 40, 60, 60])
context.generate_galois_keys()
context.global_scale = 2 ** 40

# Encrypt training and testing data
X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train]
X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test]


In [4]:
# Convert encrypted data to numpy array for processing
X_train_encrypted_np = np.array(X_train_encrypted).reshape(len(X_train), -1)
X_test_encrypted_np = np.array(X_test_encrypted).reshape(len(X_test), -1)


In [5]:
# Batch prediction with optimized batch size for memory efficiency
def batch_predict(X, weights, batch_size=35):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions).reshape(-1, 1)

# Parallel predictions with optimized threading
def parallel_predict(X, weights, num_workers=4):
    def predict_batch(batch):
        return np.dot(batch, weights)
    
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
        results = [future.result() for future in futures]
    return np.concatenate(results).reshape(-1, 1)

# Compute gradients function
def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients


In [None]:
# Training loop
start_time_enc = time.time()
batch_size = 30 #5
input_dim = X_train_encrypted_np.shape[1]  # Number of features
output_dim = 1  # Assuming binary classification
weights = np.random.randn(input_dim, output_dim)

# Define learning rate and epochs
learning_rate = 0.5
epochs = 5

for epoch in range(epochs):
    start_time_epenc = time.time()
    predictions = batch_predict(X_train_encrypted_np, weights, batch_size)
    gradients = compute_gradients(X_train_encrypted_np, y_train, predictions)
    weights -= learning_rate * gradients
    
    print(f"Epoch {epoch + 1}/{epochs} completed in {time.time() - start_time_epenc:.2f} seconds")

# Testing the model
y_pred = parallel_predict(X_test_encrypted_np, weights)
print(f"Predictions: {y_pred}")
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]
print(f"Predicted Labels: {y_pred_labels}")

# Model evaluation
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)
timeToTrain = time.time() - start_time_enc

print(f"Encrypted Training has Finished")



In [None]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Completed in {timeToTrain:.2f} seconds")

In [3]:
def batch_predict(X, weights, batch_size=35):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions)

def parallel_predict(X, weights, num_workers=4):
    def predict_batch(batch):
        return np.dot(batch, weights)
    
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
        results = [future.result() for future in futures]
    return np.concatenate(results)

def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients

In [13]:
# Initialize weights (example for a simple linear model)
start_time = time.time()
input_dim = X_train.shape[1]
output_dim = 1

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.0001
epochs = 3000

for epoch in range(epochs):
    start_time_ep = time.time()
    predictions = batch_predict(X_train, weights)
    gradients = compute_gradients(X_train, y_train, predictions)
    weights -= learning_rate * gradients
    
    print(f"Epoch {epoch+1}/{epochs}, Weights: {weights}, completed in {time.time() - start_time_ep:.2f} seconds")

# Check predictions
y_pred = parallel_predict(X_test, weights)
print(f"Predictions: {y_pred}")

y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]
print(f"Predicted Labels: {y_pred_labels}")

accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)
timeToTrain = time.time() - start_time



Epoch 1/3000, Weights: [[64.85848399]
 [ 1.27921622]
 [-0.59633595]
 [-0.42744369]
 [ 3.63318219]], completed in 0.02 seconds
Epoch 2/3000, Weights: [[-8.97793045e+03]
 [-4.69860545e+00]
 [-2.72041610e+01]
 [-2.28420431e+00]
 [-1.20350586e+03]], completed in 0.02 seconds
Epoch 3/3000, Weights: [[1.25545109e+06]
 [8.32460871e+02]
 [3.69662245e+03]
 [2.57613094e+02]
 [1.72582674e+05]], completed in 0.02 seconds
Epoch 4/3000, Weights: [[-1.75636582e+08]
 [-1.16292865e+05]
 [-5.17281625e+05]
 [-3.61032837e+04]
 [-2.41701482e+07]], completed in 0.02 seconds
Epoch 5/3000, Weights: [[2.45718841e+10]
 [1.62698027e+07]
 [7.23686520e+07]
 [5.05086270e+06]
 [3.38160590e+09]], completed in 0.02 seconds
Epoch 6/3000, Weights: [[-3.43765506e+12]
 [-2.27617756e+09]
 [-1.01245179e+10]
 [-7.06625769e+08]
 [-4.73094290e+11]], completed in 0.02 seconds
Epoch 7/3000, Weights: [[4.80934742e+14]
 [3.18441745e+11]
 [1.41644009e+12]
 [9.88583430e+10]
 [6.61868326e+13]], completed in 0.01 seconds
Epoch 8/3000,

  weights -= learning_rate * gradients


Epoch 147/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 148/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 149/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 150/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 151/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 152/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 153/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 154/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 155/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 156/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 157/3000, Weights: [[nan]
 [nan]
 [nan]
 [nan]
 [nan]], completed in 0.01 seconds
Epoch 158/3000, Weights: [[nan]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Completed in {timeToTrain:.2f} seconds")

Accuracy: 0.9850
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Completed in 38.59 seconds
