In [1]:
import numpy as np
import pandas as pd
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor
import time
from sklearn.preprocessing import StandardScaler



In [2]:
# Loading and preprocessing the data
df = pd.read_csv('payment_fraud.csv')

label_encoder = LabelEncoder()
df['paymentMethod_encoded'] = label_encoder.fit_transform(df['paymentMethod'])

features = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethod_encoded', 'paymentMethodAgeDays']
X = df[features].values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test = scaler.transform(X_test)  # Transform test data using the same scaler


In [3]:
# Encryption setup with TenSEAL
context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=16384, coeff_mod_bit_sizes=[60, 40, 40, 60, 60])
context.generate_galois_keys()
context.global_scale = 2 ** 40

# Encrypt training and testing data
X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train]
X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test]


In [4]:
# Convert encrypted data to numpy array for processing
X_train_encrypted_np = np.array(X_train_encrypted).reshape(len(X_train), -1)
X_test_encrypted_np = np.array(X_test_encrypted).reshape(len(X_test), -1)


In [8]:
# Batch prediction with optimized batch size for memory efficiency
def batch_predict(X, weights, batch_size=35):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions).reshape(-1, 1)

# Parallel predictions with optimized threading
def parallel_predict(X, weights, num_workers=4):
    def predict_batch(batch):
        return np.dot(batch, weights)
    
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
        results = [future.result() for future in futures]
    return np.concatenate(results).reshape(-1, 1)

# Compute gradients function
def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients


In [None]:
# Training loop
start_time_enc = time.time()
batch_size = 30 #5
input_dim = X_train_encrypted_np.shape[1]  # Number of features
output_dim = 1  # Assuming binary classification
weights = np.random.randn(input_dim, output_dim)

# Define learning rate and epochs
learning_rate = 0.5
epochs = 5

for epoch in range(epochs):
    start_time_epenc = time.time()
    predictions = batch_predict(X_train_encrypted_np, weights, batch_size)
    gradients = compute_gradients(X_train_encrypted_np, y_train, predictions)
    weights -= learning_rate * gradients
    
    print(f"Epoch {epoch + 1}/{epochs} completed in {time.time() - start_time_epenc:.2f} seconds")

# Testing the model
y_pred = parallel_predict(X_test_encrypted_np, weights)
print(f"Predictions: {y_pred}")
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]
print(f"Predicted Labels: {y_pred_labels}")

# Model evaluation
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)
timeToTrain = time.time() - start_time_enc

print(f"Encrypted Training has Finished")



In [None]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Completed in {timeToTrain:.2f} seconds")

In [5]:
def batch_predict(X, weights, batch_size=35):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions)

def parallel_predict(X, weights, num_workers=4):
    def predict_batch(batch):
        return np.dot(batch, weights)
    
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
        results = [future.result() for future in futures]
    return np.concatenate(results)

def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients

In [6]:
# Initialize weights (example for a simple linear model)
start_time = time.time()
input_dim = X_train.shape[1]
output_dim = 1

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.0001
epochs = 3000

for epoch in range(epochs):
    start_time_ep = time.time()
    predictions = batch_predict(X_train, weights)
    gradients = compute_gradients(X_train, y_train, predictions)
    weights -= learning_rate * gradients
    
    print(f"Epoch {epoch+1}/{epochs}, Weights: {weights}, completed in {time.time() - start_time_ep:.2f} seconds")

# Check predictions
y_pred = parallel_predict(X_test, weights)
print(f"Predictions: {y_pred}")

y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]
print(f"Predicted Labels: {y_pred_labels}")

accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)
timeToTrain = time.time() - start_time



Epoch 1/3000, Weights: [[-0.39869089]
 [ 0.59286635]
 [-0.68002939]
 [-1.73814951]
 [-0.41992478]], completed in 0.08 seconds
Epoch 2/3000, Weights: [[-0.39863768]
 [ 0.59280387]
 [-0.67996139]
 [-1.7379757 ]
 [-0.41987381]], completed in 0.02 seconds
Epoch 3/3000, Weights: [[-0.39858448]
 [ 0.5927414 ]
 [-0.67989339]
 [-1.73780191]
 [-0.41982285]], completed in 0.01 seconds
Epoch 4/3000, Weights: [[-0.39853129]
 [ 0.59267893]
 [-0.6798254 ]
 [-1.73762813]
 [-0.41977189]], completed in 0.01 seconds
Epoch 5/3000, Weights: [[-0.3984781 ]
 [ 0.59261647]
 [-0.67975742]
 [-1.73745438]
 [-0.41972094]], completed in 0.01 seconds
Epoch 6/3000, Weights: [[-0.39842492]
 [ 0.59255401]
 [-0.67968944]
 [-1.73728064]
 [-0.41966999]], completed in 0.01 seconds
Epoch 7/3000, Weights: [[-0.39837175]
 [ 0.59249157]
 [-0.67962147]
 [-1.73710691]
 [-0.41961905]], completed in 0.02 seconds
Epoch 8/3000, Weights: [[-0.39831859]
 [ 0.59242912]
 [-0.67955351]
 [-1.73693321]
 [-0.41956812]], completed in 0.01 

In [7]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Completed in {timeToTrain:.2f} seconds")

Accuracy: 0.5344
Precision: 0.0180
Recall: 0.5593
F1 Score: 0.0349
Completed in 40.34 seconds
