In [1]:
import numpy as np
import pandas as pd
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor


In [2]:
df = pd.read_csv('payment_fraud.csv')

label_encoder = LabelEncoder()
df['paymentMethod_encoded'] = label_encoder.fit_transform(df['paymentMethod'])

features = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethod_encoded', 'paymentMethodAgeDays']
X = df[features].values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=16384, coeff_mod_bit_sizes=[60, 40, 40, 60, 60])
context.generate_galois_keys()
context.global_scale = 2 ** 40

#def encrypt_dataset(X, context):
#    return [ts.ckks_vector(context, x) for x in X]
#X_train_encrypted = encrypt_dataset(X_train, context)

X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train]
#X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test]
#X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train_df.values]
#X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test_df.values]



In [None]:
X_train_encrypted_np = np.array( X_train_encrypted)
#X_test_encrypted_np = np.array( X_test_encrypted)
X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], -1)
#X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], X_test_encrypted_np.shape[1])
print(X_train_encrypted_np.shape)
#print(X_test_encrypted_np.shape)

In [7]:
def batch_predict(X, weights, batch_size=100):
    predictions = []
    # Ensure that X has the correct shape (number of features should match the weights)
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        # Perform the dot product for each batch
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions).reshape(-1, 1)

import joblib
from joblib import Parallel, delayed

# Parallelizing predictions
def parallel_predict(X, weights):
    return Parallel(n_jobs=-1)(delayed(batch_predict)(x, weights) for x in X)

#y_pred = parallel_predict(X_test, weights)
    
#    with ThreadPoolExecutor(max_workers=num_workers) as executor:
#        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
#        results = [future.result() for future in futures]
#    return np.concatenate(results).reshape(-1, 1)  # Ensure predictions are column vectors

def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients


In [None]:
batch_size = 35
# Initialize weights
input_dim = X_train_encrypted_np.shape[1]  # Number of features
output_dim = 1  # Assuming binary classification
weights = np.random.randn(input_dim, output_dim)

# Define learning rate and epochs
learning_rate = 0.001
epochs = 5

# Training loop
# Ensure encrypted data has the correct shape
X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], X_test_encrypted_np.shape[1])
print(X_train_encrypted_np.shape)
#print(X_test_encrypted_np.shape)

for epoch in range(epochs):
    predictions = batch_predict(X_train_encrypted_np, weights)
    gradients = compute_gradients(X_train_encrypted_np, y_train, predictions)
    weights = weights - learning_rate * gradients

    # Optionally: Print weights for debugging
    print(f"Epoch {epoch+1}/{epochs}, Weights: {weights}")
#for epoch in range(epochs):
#    for i in range(0, len(X_train_encrypted_np), batch_size):
#        X_batch = X_train_encrypted_np[i:i+batch_size]
#        y_batch = y_train[i:i+batch_size]
#        
#        predictions = batch_predict(X_batch, weights)
#        gradients = compute_gradients(X_batch, y_batch, predictions)
#        
#        weights = weights - learning_rate * gradients
#    print(f"Epoch {epoch+1}/{epochs}, Weights: {weights}")

# Test model
#y_pred = parallel_predict(X_test_encrypted_np, weights)
y_pred = parallel_predict(X_test, weights)
# Convert predictions to binary labels
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Evaluate model
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)


print(f"Accuracy: {accuracy}") 

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

In [3]:
def batch_predict(X, weights, batch_size=100):
    predictions = []
    for i in range(0, len(X), batch_size):
        batch = X[i:i + batch_size]
        batch_predictions = np.dot(batch, weights)
        predictions.extend(batch_predictions)
    return np.array(predictions)

def parallel_predict(X, weights, num_workers=4):
    def predict_batch(batch):
        return np.dot(batch, weights)
    
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(predict_batch, X[i::num_workers]) for i in range(num_workers)]
        results = [future.result() for future in futures]
    return np.concatenate(results)

def compute_gradients(X, y, predictions):
    errors = predictions - y.reshape(-1, 1)
    gradients = np.dot(X.T, errors) / len(X)
    return gradients

In [None]:
# Initialize weights (example for a simple linear model)
input_dim = X_train.shape[1]
output_dim = 1

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.5
epochs = 2000

for epoch in range(epochs):
    predictions = batch_predict(X_train, weights)
    gradients = compute_gradients(X_train, y_train, predictions)
    weights = weights - learning_rate * gradients
    print(f"Epoch {epoch+1}/{epochs}, Weights: {weights}")

# Check predictions
y_pred = parallel_predict(X_test, weights)
print(f"Predictions: {y_pred}")

y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]
print(f"Predicted Labels: {y_pred_labels}")

accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels)
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")