In [None]:
import numpy as np
import pandas as pd
import zipfile
import dask.dataframe as dk
import tensorflow as tf
import io
from tensorflow.keras.utils import Sequence, to_categorical
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import mixed_precision

# Bật mixed precision
mixed_precision.set_global_policy('mixed_float16')
# Tắt một số cảnh báo không cần thiết từ TensorFlow
tf.get_logger().setLevel('ERROR')
tf.config.optimizer.set_jit(True)  # Tắt XLA để tránh lỗi CUDA

input_files = [f"file{i+1}.csv" for i in range(3)]
temp_dir = "FL_Data1/"
input_files = [temp_dir + output_file for output_file in input_files]
print(input_files)

df = [dk.read_csv(file) for file in input_files]
print(df[1].dtypes)

batch_size = 256
ratio_test_all = 0.2
features_len = len(df[1].columns) - 1
print("Feature Len: ", features_len)

def standardize_dask_df(dask_df):
    scaler = StandardScaler()
    X = dask_df.drop(columns=['label']).compute().values
    scaler.fit(X)
    for part in dask_df.to_delayed():
        part = part.compute()
        if part.empty:
            continue
        X_part = part.drop(columns=['label']).values
        X_scaled = scaler.transform(X_part)
        part.iloc[:, :-1] = X_scaled
        yield part

scaled_dfs = []
for dask_df in df:
    scaled_parts = list(standardize_dask_df(dask_df))
    scaled_df = dk.from_pandas(pd.concat(scaled_parts), npartitions=dask_df.npartitions)
    scaled_dfs.append(scaled_df)

train_dfs = []
val_dfs = []
test_dfs = []
for dff in scaled_dfs:
    train_df, val_test_df = dff.random_split([1 - ratio_test_all, ratio_test_all])
    test_df, val_df = val_test_df.random_split([1 - 0.25, 0.25])
    train_dfs.append(train_df)
    val_dfs.append(val_df)
    test_dfs.append(test_df)
"""
train_dfs = []
val_dfs = []
test_dfs= []
for dff in df:
    train_df, val_test_df =dff.random_split([1 - ratio_test_all, ratio_test_all])
    test_df, val_df = val_test_df.random_split([1-0.25, 0.25])
    train_dfs.append(train_df)
    val_dfs.append(val_df)
    test_dfs.append(test_df)
"""
def dask_to_tf_dataset(dask_df, batch_size):
    def generator():
        for batch in dask_df.to_delayed():
            batch = batch.compute()
            if batch.empty:
                continue
            X = batch.drop(columns='label').values.astype(np.float32)
            y = batch['label'].values.astype(np.int32)
            num_splits = max(1, len(X) // batch_size)
            X_batches = np.array_split(X, num_splits)
            y_batches = np.array_split(y, num_splits)
            for X_batch, y_batch in zip(X_batches, y_batches):
                yield X_batch, y_batch
    output_signature = (
        tf.TensorSpec(shape=(None, features_len), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.int32),
    )
    return tf.data.Dataset.from_generator(generator, output_signature=output_signature).prefetch(tf.data.AUTOTUNE)

train_gens = [dask_to_tf_dataset(train_df, batch_size).repeat() for train_df in train_dfs]
val_gens = [dask_to_tf_dataset(val_df, batch_size).repeat() for val_df in val_dfs]
test_gens = [dask_to_tf_dataset(test_df, batch_size).repeat() for test_df in test_dfs]

from server_no_HE import Server
from client_no_HE import Client
import datetime
import tenseal as ts

num_servers = 1
num_clients = 3

stepsPerEpoch_Clients = [int(np.ceil(train_dfs[index].shape[0].compute()) / batch_size) for index in range(num_clients)]
stepsValidate_Clients = [int(np.ceil(val_dfs[index].shape[0].compute()) / batch_size) for index in range(num_clients)]
stepsTest_Clients = [int(np.ceil(test_dfs[index].shape[0].compute()) / batch_size) for index in range(num_clients)]

active_servers_list = ['server_' + str(i) for i in range(num_servers)]
active_clients_list = ['client_' + str(i) for i in range(num_clients)]
print(active_servers_list)
print(active_clients_list)

def init_he_context():
    context = ts.context(
        ts.SCHEME_TYPE.CKKS,
        poly_modulus_degree=16384,
        coeff_mod_bit_sizes=[60, 40, 40, 40, 40, 60]
    )
    context.generate_galois_keys()
    context.global_scale = 2**40
    return context

context = init_he_context()

agents_dict = {}
serverObjects = {server_name: Server(server_name=server_name, active_clients_list=active_clients_list)
                 for server_name in active_servers_list}

clientObjects = {client_name: Client(client_name, train_gens[clientID], val_gens[clientID], test_gens[clientID],
                                     stepsPerEpoch_Clients[clientID], stepsValidate_Clients[clientID], stepsTest_Clients[clientID],
                                     active_clients_list=active_clients_list, he_context=context)
                 for clientID, client_name in enumerate(active_clients_list)}

for index, client_name in enumerate(active_clients_list):
    clientObjects[client_name].get_steps_per_epoch()
    clientObjects[client_name].get_validation_steps()
    clientObjects[client_name].get_test_steps()

agents_dict['server'] = serverObjects
agents_dict['client'] = clientObjects

for agent_name, agent in serverObjects.items():
    agent.set_agentsDict(agents_dict=agents_dict)
for agent_name, agent in clientObjects.items():
    agent.set_agentsDict(agents_dict=agents_dict)

# Giải phóng DataFrame sau khi không cần thiết
del train_dfs, val_dfs, test_dfs

server = agents_dict['server']['server_0']
if __name__ == '__main__':
    server.InitLoop()
    server.final_statistics()

2025-05-26 16:20:53.402955: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-26 16:20:53.413957: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748251253.424721  211916 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748251253.427827  211916 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748251253.437263  211916 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

['FL_Data1/file1.csv', 'FL_Data1/file2.csv', 'FL_Data1/file3.csv']
flow_duration      float64
Header_Length      float64
Protocol Type      float64
Duration           float64
Rate               float64
Srate              float64
Drate              float64
fin_flag_number    float64
syn_flag_number    float64
rst_flag_number    float64
psh_flag_number    float64
ack_flag_number    float64
ece_flag_number    float64
cwr_flag_number    float64
ack_count          float64
syn_count          float64
fin_count          float64
urg_count          float64
rst_count          float64
HTTP               float64
HTTPS              float64
DNS                float64
Telnet             float64
SMTP               float64
SSH                float64
IRC                float64
TCP                float64
UDP                float64
DHCP               float64
ARP                float64
ICMP               float64
IPv                float64
LLC                float64
Tot sum            float64
Min            

I0000 00:00:1748251274.960129  211916 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3539 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


['server_0']
['client_0', 'client_1', 'client_2']
Train steps:  6376
Val steps:  398
Test steps:  1191
Train steps:  4781
Val steps:  297
Test steps:  895
Train steps:  4781
Val steps:  299
Test steps:  893
Epoch 1/3
Epoch 1/3
Epoch 1/3


I0000 00:00:1748251279.345674  211987 service.cc:152] XLA service 0x7f1bac005170 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748251279.345727  211987 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
I0000 00:00:1748251279.351187  211985 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1748251279.461523  211981 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2025-05-26 16:21:20.671383: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.









2025-05-26 16:21:23.444457: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-05-26 16:21:23.54

[1m4957/6376[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m7s[0m 5ms/step - accuracy: 0.9979 - loss: 0.0095





[1m5020/6376[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m7s[0m 5ms/step - accuracy: 0.9980 - loss: 0.0094




[1m5116/6376[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m6s[0m 5ms/step - accuracy: 0.9980 - loss: 0.0093






[1m5385/6376[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m5s[0m 5ms/step - accuracy: 0.9981 - loss: 0.0090






[1m5467/6376[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m4s[0m 5ms/step - accuracy: 0.9981 - loss: 0.0089






[1m5926/6376[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m2s[0m 5ms/step - accuracy: 0.9982 - loss: 0.0083






[1m5949/6376[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m2s[0m 5ms/step - accuracy: 0.9982 - loss: 0.0083





[1m6000/6376[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1s[0m 5ms/step - accuracy: 0.9982 - loss: 0.0083





[1m6267/6376[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9983 - loss: 0.0080






[1m4781/4781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 7ms/step - accuracy: 0.9971 - loss: 0.0143 - val_accuracy: 0.9999 - val_loss: 8.7445e-04
Epoch 2/3
[1m 445/4781[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 3ms/step - accuracy: 0.9999 - loss: 3.9795e-04






[1m4781/4781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 7ms/step - accuracy: 0.9958 - loss: 0.0223 - val_accuracy: 0.9999 - val_loss: 0.0011
Epoch 2/3
[1m6376/6376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 6ms/step - accuracy: 0.9983 - loss: 0.0079 - val_accuracy: 0.9999 - val_loss: 7.5716e-04
Epoch 2/3
[1m4781/4781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 5ms/step - accuracy: 0.9999 - loss: 6.5715e-04 - val_accuracy: 0.9999 - val_loss: 7.7893e-04
Epoch 3/3
[1m4781/4781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 5ms/step - accuracy: 0.9995 - loss: 0.0034 - val_accuracy: 0.9999 - val_loss: 0.0010
Epoch 3/3
[1m6376/6376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 5ms/step - accuracy: 0.9999 - loss: 5.9638e-04 - val_accuracy: 0.9999 - val_loss: 7.1876e-04
[1m2366/4781[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m11s[0m 5ms/step - accuracy: 0.9999 - loss: 5.4189e-04Epoch 3/3
[1m4781/4781[0m [32m━━━━━━━━━━━━━━━━━━━━[