In [10]:
import numpy as np
import pandas as pd
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import StandardScaler


In [11]:
df = pd.read_csv('payment_fraud.csv')

label_encoder = LabelEncoder()
df['paymentMethod_encoded'] = label_encoder.fit_transform(df['paymentMethod'])

features = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethod_encoded', 'paymentMethodAgeDays']
X = df[features].values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test = scaler.transform(X_test)  # Transform test data using the same scaler


In [3]:
# Encryption setup with TenSEAL
context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=16384, coeff_mod_bit_sizes=[60, 40, 40, 60, 60])
context.generate_galois_keys()
context.global_scale = 2 ** 40

# Encrypt training and testing data
X_train_encrypted = [ts.ckks_vector(context, x) for x in X_train]
X_test_encrypted = [ts.ckks_vector(context, x) for x in X_test]


In [6]:
X_train_encrypted_np = np.array( X_train_encrypted)
#X_test_encrypted_np = np.array( X_test_encrypted)
X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], -1)
#X_train_encrypted_np = X_train_encrypted_np.reshape(X_train_encrypted_np.shape[0], -1)
#X_test_encrypted_np = X_test_encrypted_np.reshape(X_test_encrypted_np.shape[0], X_test_encrypted_np.shape[1])
print(X_train_encrypted_np.shape)
#print(X_test_encrypted_np.shape)

(31376, 1)


In [7]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define functions if not already available
def batch_predict(X, weights):
    # Simple linear model prediction (e.g., X @ weights for matrix multiplication)
    return X @ weights

def compute_gradients(X, y, predictions):
    # Compute the gradients using mean squared error as an example
    errors = predictions - y
    gradients = X.T @ errors / len(y)  # Mean of the gradient
    return gradients

def parallel_predict(X, weights):
    # Parallelized version, could be similar to batch_predict if parallelism is not needed
    return batch_predict(X, weights)

# Load data (example placeholders)
# Ensure that X_train, X_test, y_train, and y_test are properly defined before running this code
# For example: X_train, X_test, y_train, y_test = train_test_split(...)

# Initialize weights for a simple linear model
input_dim = X_train_encrypted_np.shape[1]  # Number of features
output_dim = 1  # Binary classification output

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.01  # Reduced learning rate
epochs = 100  # Reduced epochs

# Training loop with optimizations
for epoch in range(epochs):
    # Forward pass: calculate predictions
    predictions = batch_predict(X_train_encrypted_np, weights)

    # Calculate gradients and update weights
    gradients = compute_gradients(X_train_encrypted_np, y_train, predictions)
    weights = weights - learning_rate * gradients

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Weights update: {weights}")

# Check predictions on test data
y_pred = parallel_predict(X_test, weights)

# Threshold predictions to get binary classification results
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Calculate and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels, zero_division=1)  # Handle zero precision cases
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


KeyboardInterrupt: 

In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define functions if not already available
def batch_predict(X, weights):
    # Simple linear model prediction (e.g., X @ weights for matrix multiplication)
    return X @ weights

def compute_gradients(X, y, predictions):
    # Compute the gradients using mean squared error as an example
    errors = predictions - y
    gradients = X.T @ errors / len(y)  # Mean of the gradient
    return gradients

def parallel_predict(X, weights):
    # Parallelized version, could be similar to batch_predict if parallelism is not needed
    return batch_predict(X, weights)

# Load data (example placeholders)
# Ensure that X_train, X_test, y_train, and y_test are properly defined before running this code
# For example: X_train, X_test, y_train, y_test = train_test_split(...)

# Initialize weights for a simple linear model
input_dim = X_train.shape[1]  # Number of features
output_dim = 1  # Binary classification output

weights = np.random.randn(input_dim, output_dim)
learning_rate = 0.01  # Reduced learning rate
epochs = 100  # Reduced epochs

# Training loop with optimizations
for epoch in range(epochs):
    # Forward pass: calculate predictions
    predictions = batch_predict(X_train, weights)

    # Calculate gradients and update weights
    gradients = compute_gradients(X_train, y_train, predictions)
    weights = weights - learning_rate * gradients

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Weights update: {weights}")

# Check predictions on test data
y_pred = parallel_predict(X_test, weights)

# Threshold predictions to get binary classification results
y_pred_labels = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Calculate and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels, zero_division=1)  # Handle zero precision cases
recall = recall_score(y_test, y_pred_labels)
f1 = f1_score(y_test, y_pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Epoch 10/100, Weights update: [[-1.40553553e+41 -1.40553553e+41 -1.40553553e+41 ... -1.40553553e+41
  -1.40553553e+41 -1.40553553e+41]
 [-9.30648480e+37 -9.30648480e+37 -9.30648480e+37 ... -9.30648480e+37
  -9.30648480e+37 -9.30648480e+37]
 [-4.13955718e+38 -4.13955718e+38 -4.13955718e+38 ... -4.13955718e+38
  -4.13955718e+38 -4.13955718e+38]
 [-2.88914277e+37 -2.88914277e+37 -2.88914277e+37 ... -2.88914277e+37
  -2.88914277e+37 -2.88914277e+37]
 [-1.93431535e+40 -1.93431535e+40 -1.93431535e+40 ... -1.93431535e+40
  -1.93431535e+40 -1.93431535e+40]]
Epoch 20/100, Weights update: [[-4.33216008e+82 -4.33216008e+82 -4.33216008e+82 ... -4.33216008e+82
  -4.33216008e+82 -4.33216008e+82]
 [-2.86845697e+79 -2.86845697e+79 -2.86845697e+79 ... -2.86845697e+79
  -2.86845697e+79 -2.86845697e+79]
 [-1.27589975e+80 -1.27589975e+80 -1.27589975e+80 ... -1.27589975e+80
  -1.27589975e+80 -1.27589975e+80]
 [-8.90495382e+78 -8.90495382e+78 -8.90495382e+78 ... -8.90495382e+78
  -8.90495382e+78 -8.90495382

  gradients = X.T @ errors / len(y)  # Mean of the gradient
  gradients = X.T @ errors / len(y)  # Mean of the gradient
  weights = weights - learning_rate * gradients


Epoch 80/100, Weights update: [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Epoch 90/100, Weights update: [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Epoch 100/100, Weights update: [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()