In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import math
import seaborn as sns
from tqdm.notebook import tqdm

from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import mean_absolute_error, mean_squared_error

from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

import umap
import umap.plot

from plotly.subplots import make_subplots
import plotly.graph_objects as go   

In [None]:
from openfhe_lib.ckks.openFHE import * 

# === Generate Key-pairs of CKKS Context ===
generate_keys()

In [None]:
n_features=9

In [None]:
shapes_list=[(9, 32), (32,), (32, 16), (16,), (16, 8), (8,), (8, 16), (16,), (16, 32), (32,), (32, 9), (9,)]

class AnomalyDetector(Model):
  def __init__(self):
    super(AnomalyDetector, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Dense(32, activation="relu"),
      layers.Dense(16, activation="relu"),
      layers.Dense(8, activation="relu")])
    
    self.decoder = tf.keras.Sequential([
      layers.Dense(16, activation="relu"),
      layers.Dense(32, activation="relu"),
      layers.Dense(n_features, activation="sigmoid")])
    
  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded



In [None]:
import time

security_time=0

class Client:
    def __init__(self, name, data_url, enc_file, n_features, iters):
        self.id = name
        self.enc_file = enc_file  # place wher clients save encrypted weights
        
        # split data into train and test
        self.preprocessing(data_url)
        
        # define local training model
        self.local_model = AnomalyDetector()
        
        # some helpfull stuffs
        # self.decide_vectorized = np.vectorize(self.decide)
        self.to_percent = lambda x: '{:.2f}%'.format(x)
        self.num_epochs = iters
        self.accuracies = []
        self.losses = []
        
    def preprocessing(self, data_url):

        dataframe = pd.read_csv(data_url, header=None)
        print(f"[{self.id}] Data shape: {dataframe.shape}")
        dataframe = dataframe.apply(pd.to_numeric, errors='coerce')
        dataframe = dataframe.dropna()

        raw_data = dataframe.values
        labels = raw_data[:, -1]
        data = raw_data[:, 0:-1]

        self.train_data, self.test_data, self.train_labels, self.test_labels = train_test_split(data, labels, test_size=0.2, random_state=21)
        min_val = tf.reduce_min(self.train_data)
        max_val = tf.reduce_max(self.train_data)

        self.train_data = (self.train_data - min_val) / (max_val - min_val)
        self.test_data = (self.test_data - min_val) / (max_val - min_val)

        self.train_data = tf.cast(self.train_data, tf.float32)
        self.test_data = tf.cast(self.test_data, tf.float32)
        self.train_labels = self.train_labels.astype(bool)
        self.test_labels = self.test_labels.astype(bool)

        self.normal_train_data = self.train_data[self.train_labels]
        self.normal_test_data = self.test_data[self.test_labels]

        self.anomalous_train_data = self.train_data[~self.train_labels]
        self.anomalous_test_data = self.test_data[~self.test_labels]

        # **Print Data Sizes for Debugging**
        print(f"[{self.id}] Training samples: {len(self.normal_train_data)}")
        print(f"[{self.id}] Testing samples: {len(self.normal_test_data)}")
    
    # def decide(self, y):
    #     return 1. if y >=0.5 else 0.
    
    # def compute_accuracy(self, input, output):
    #     prediction = self.local_model(input).data.numpy()[:, 0]
    #     n_samples = prediction.shape[0] + 0.
    #     prediction = self.decide_vectorized(prediction)
    #     equal = prediction == output.data.numpy()
    #     return 100. * equal.sum() / n_samples
    
    def local_training(self, debug=True):

        self.local_model.compile(optimizer='adam', loss='mae', metrics=['accuracy'])

        self.history=self.local_model.fit(
            self.normal_train_data, self.normal_train_data,
            epochs=self.num_epochs,
            batch_size=512,
            validation_data=(self.test_data, self.test_data),
            shuffle=True
        )
        
        print("History keys:", self.history.history.keys())

        self.losses.extend(self.history.history['loss'])
        self.accuracies.extend(self.history.history['accuracy'])
    
    def encrypted_model_params(self):
        weights_list = self.local_model.get_weights()
        flattened_list = [] # 1969 elements
        for w in weights_list:
            flattened_list.extend(w.flatten().tolist())

        # print("Length of flattened_list:", len(flattened_list))
        # print(flattened_list[0:5])
        global security_time
        start = time.perf_counter()
        encrypt_weights(flattened_list, self.enc_file)
        end  = time.perf_counter()
        security_time += end - start
        
    def decrypted_model_params(self):
        global security_time

        start = time.perf_counter()
        params = decrypt_weights("/enc_aggregator_weight_server.txt")
        end  = time.perf_counter()
        security_time += end - start
        

        reconstructed_weights_list = []
        start_index = 0

        for shape in shapes_list:
            size = np.prod(shape)
            slice_ = params[start_index : start_index + size]
            w_array = np.array(slice_).reshape(shape)
            
            reconstructed_weights_list.append(w_array)
            start_index += size
        
        self.local_model.set_weights(reconstructed_weights_list)
    
    def plot_graphs(self, plot_title = 'EV PRED'):
        plt.plot(self.losses)
        plt.title(f"{plot_title} - Training Loss")
        plt.xlabel("Iterations")
        plt.ylabel("Training Loss")
        plt.show()
        plt.plot(self.accuracies)
        plt.title(f"{plot_title} - Training Accuracy")
        plt.xlabel("Iterations")
        plt.ylabel("Training Accuracy (Percent %)")
        plt.show()
    
    def print_result_after_training(self):
        print('Model parameters:')
        print('  | Weights: %s' % self.local_model.get_weights())
        self.plot_graphs()
    
    # def evaluating_model(self):
    #     test_acc = self.compute_accuracy(self.X_test, self.Y_test)
    #     print('[+] Testing Accuracy = {}'.format(self.to_percent(test_acc)))

In [None]:
# clients = [
#     Client('Car1', 'data/cars/car_506.csv', "/enc_weight_client1.txt", n_features, iters=2), 
#     Client('Car2', 'data/cars/car_516.csv', "/enc_weight_client2.txt", n_features, iters=2),
#     Client('Car3', 'data/cars/car_512.csv', "/enc_weight_client3.txt", n_features, iters=2),
#     Client('Car4', 'data/cars/car_503.csv', "/enc_weight_client4.txt", n_features, iters=2)
# ]

# clients = [
#     Client('Car1', 'data/cars/split_1.csv', "/enc_weight_client1.txt", n_features, iters=5), 
#     Client('Car2', 'data/cars/split_2.csv', "/enc_weight_client2.txt", n_features, iters=5),
#     Client('Car3', 'data/cars/split_3.csv', "/enc_weight_client3.txt", n_features, iters=5),
#     Client('Car4', 'data/cars/split_4.csv', "/enc_weight_client4.txt", n_features, iters=5)
# ]

clients = [
    Client(f'Car{i+1}', f'data/cars/split_{i+1}.csv', f"/enc_weight_client{i+1}.txt", n_features, iters=5)
    for i in range(5)
]


In [None]:
iterations = 5 #2000
to_percent = lambda x: '{:.2f}%'.format(x)
n_cars = len(clients)
n_features = 9
    
def compute_federated_accuracy(model, input, output):
    prediction = model(input)
    n_samples = prediction.shape[0]
    s = 0.
    for i in range(n_samples):
        p = 1. if prediction[i] >= 0.5 else 0.
        e = 1. if p == output[i] else 0.
        s += e
    return 100. * s / n_samples

def federated_learning(clients):
    # init global training model
    # global_model = LogisticRegression(n_features)

    # record losses and accuracies report from clients
    losses = [[] for i in range(n_cars)]
    accuracies = [[] for i in range(n_cars)]
    
    pbar = tqdm(range(iterations), desc='Federated Learning Process')
    for iteration in pbar:
        print(iteration)
        if iteration: # enter this condition after the first iteration
            for i in range(n_cars):
                clients[i].decrypted_model_params()
                # print(f"Decrypting client {i} done")
        
        for i in range(n_cars):
            clients[i].local_training(debug=False)
            
            # report to server
            losses[i].append(clients[i].losses[-1])
            accuracies[i].append(clients[i].accuracies[-1])
        
        # clients encrypt the final weights of local model after training
        for i in range(n_cars):
            clients[i].encrypted_model_params()

        with tf.GradientTape() as tape:
            with tape.stop_recording():
                aggregator()

    
        # logging
        if (iteration + 1) % 100 == 0:
            losses_str = ['{:.4f}'.format(losses[i][-1]) for i in range(n_cars)]
            accuracies_str = [to_percent(accuracies[i][-1]) for i in range(n_cars)]
            print('[LOG] Epoch = {0:04d}\n> Losses = {1}\n> Accuracies = {2}'.format(iteration + 1, losses_str, accuracies_str))
        
    return losses, accuracies

In [None]:
import time
st=time.time()
losses, accuracies = federated_learning(clients)
et=time.time()


In [None]:
time_taken = et-st
print("Time taken for training with ckks:", time_taken)

In [None]:
losses

In [None]:
accuracies

In [None]:
def plot_federated_graphs(diagnosis_title, losses, accuracies):
    for i in range(n_cars):
        plt.plot(losses[i], label=f'CAR {i+1}')
    legend = plt.legend(loc='upper right', shadow=True)
    plt.title(f"{diagnosis_title} - Training Loss")
    plt.xlabel("Iterations")
    plt.ylabel("Training Loss")
    plt.show()
    for i in range(n_cars):
        plt.plot(accuracies[i], label=f'car {i+1}')
    legend = plt.legend(loc='lower right', shadow=True)
    plt.title(f"{diagnosis_title} - Training Accuracy")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy (Percent %)")
    plt.show()

plot_federated_graphs('PRED EV', losses, accuracies)

In [None]:
# for i in range(4):
#     local_mod = clients[i].local_model

#     print(f'\nModel parameters client{i}:')
#     print('  | Weights: %s' % local_mod.linear.weight) ### Virtualize record of training processlobal_model.linear.weight)
#     print('  | Bias: %s' % local_mod.linear.bias)    

clients[0].decrypted_model_params()
global_model = clients[0].local_model

print('\nModel parameters:')
print('  | Weights: %s' % global_model.get_weights()) ### Virtualize record of training processlobal_model.linear.weight)
# print('  | Bias: %s' % global_model.linear.bias)

In [None]:
# # prepare data for testing model
# df_test = pd.read_csv('data/cars/test_data.csv')
# df_test["label"] = (df_test["label"] == 10).astype(int)
# test , X_test , Y_test  = scale_dataset(df_test , False)

# test_acc = compute_federated_accuracy(global_model, X_test, Y_test)
# print('\nTesting Accuracy = {}'.format(to_percent(test_acc)))

In [None]:
type(clients[0].normal_train_data)

In [None]:
all_normal_train_data = tf.concat(
    [client.normal_train_data for client in clients if len(client.normal_train_data) > 0],
    axis=0
)

In [None]:
type(all_normal_train_data)

In [None]:
reconstructions = global_model.predict(all_normal_train_data)
train_loss = tf.keras.losses.mae(reconstructions, all_normal_train_data)

In [None]:
threshold = np.mean(train_loss) + np.std(train_loss)
print("Threshold: ", threshold)

In [None]:
all_anomalous_test_data = tf.concat(
    [client.anomalous_test_data for client in clients if len(client.anomalous_test_data) > 0],
    axis=0
)

In [None]:
reconstructions = global_model.predict(all_anomalous_test_data)
test_loss = tf.keras.losses.mae(reconstructions, all_anomalous_test_data)

In [None]:
def predict(model, data, threshold):
  reconstructions = model(data)
  loss = tf.keras.losses.mae(reconstructions, data)
  return tf.math.less(loss, threshold)

def print_stats(predictions, labels):
    # Convert tensors to NumPy arrays
    predictions = predictions.numpy() if isinstance(predictions, tf.Tensor) else predictions
    labels = labels.numpy() if isinstance(labels, tf.Tensor) else labels

    print("Accuracy = {}".format(accuracy_score(labels, predictions)))
    print("Precision = {}".format(precision_score(labels, predictions)))
    print("Recall = {}".format(recall_score(labels, predictions)))

In [None]:
all_test_data = tf.concat(
    [client.test_data for client in clients if len(client.test_data) > 0],
    axis=0
)
type(all_test_data)

In [None]:
active_clients = [client for client in clients if len(client.test_labels) > 0]
if not active_clients:
    raise ValueError("No clients have test_labels to combine.")
test_label_arrays = [client.test_labels for client in active_clients]
all_test_labels = np.concatenate(test_label_arrays, axis=0)

# all_test_labels[0]

In [None]:
preds = predict(global_model, all_test_data, threshold)
print_stats(preds, all_test_labels)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

def get_clf_eval(y_test, pred=None, pred_proba=None):
    """
    Evaluates classification performance and plots the ROC curve.

    Parameters:
    - y_test (array-like): True binary labels.
    - pred (array-like, optional): Predicted binary labels.
    - pred_proba (array-like, optional): Predicted probabilities for the positive class.

    Returns:
    - confusion (ndarray): Confusion matrix.
    """
    # Convert tensors to NumPy arrays if needed
    y_test = y_test.numpy() if isinstance(y_test, tf.Tensor) else y_test
    pred = pred.numpy() if isinstance(pred, tf.Tensor) else pred
    pred_proba = pred_proba.numpy() if isinstance(pred_proba, tf.Tensor) else pred_proba

    # Compute evaluation metrics
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    roc_auc = roc_auc_score(y_test, pred_proba)

    # Print evaluation metrics
    print('Confusion Matrix:')
    print(confusion)
    print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, '
          f'F1 Score: {f1:.4f}, AUROC: {roc_auc:.4f}')

    # Plot ROC Curve
    fpr, tpr, thresholds = roc_curve(y_test, pred_proba)
    roc_data = pd.DataFrame({'FPR': fpr, 'TPR': tpr})
    roc_data.to_csv("ckks_roc.csv", index=False)
    print(f'FPR and TPR saved to "ckks_roc.csv"')
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUROC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.show()

    return confusion


In [None]:
confusion_matrix = get_clf_eval(all_test_labels,preds,preds)
plt.figure(figsize=(8,6))
sns.set(font_scale = 2)
sns.set_style("white")
sns.heatmap(confusion_matrix, cmap = 'gist_yarg_r',annot = True, fmt='d')

In [None]:
print("Time taken for security operations:", security_time)

print("Ratio :" ,security_time/time_taken)