In [1]:
import numpy as np
import random
import os
import flwr as fl
import tensorflow as tf
from tensorflow import keras

tf.keras.backend.clear_session()

import pandas as pd
%matplotlib inline
df = pd.read_csv('datasets/local_cache/dataset_12500_samples_65_features.csv')

In [2]:
df.columns

Index(['src_ip', 'dst_ip', 'src_port', 'dst_port', 'protocol', 'flow_duration',
       'total_fwd_packets', 'total_bwd_packets', 'total_len_fwd_packets',
       'total_len_bwd_packets', 'fwd_pkt_len_max', 'fwd_pkt_len_min',
       'fwd_pkt_len_mean', 'fwd_pkt_len_std', 'fwd_seg_size_min',
       'bwd_pkt_len_max', 'bwd_pkt_len_min', 'bwd_pkt_len_mean',
       'bwd_pkt_len_std', 'bwd_seg_size_min', 'pkt_len_mean', 'pkt_len_std',
       'pkt_len_var', 'flow_bytes_per_s', 'flow_packets_per_s',
       'fwd_packets_per_s', 'bwd_packets_per_s', 'bandwidth_bps',
       'flow_iat_mean', 'flow_iat_std', 'flow_iat_max', 'flow_iat_min',
       'fwd_iat_total', 'fwd_iat_mean', 'fwd_iat_std', 'fwd_iat_max',
       'fwd_iat_min', 'bwd_iat_total', 'bwd_iat_mean', 'bwd_iat_std',
       'bwd_iat_max', 'bwd_iat_min', 'fin_flag_cnt', 'syn_flag_cnt',
       'rst_flag_cnt', 'psh_flag_cnt', 'ack_flag_cnt', 'urg_flag_cnt',
       'cwe_flag_cnt', 'ece_flag_cnt', 'down_up_ratio', 'avg_packet_size',
       'avg

In [3]:
df.head()

Unnamed: 0,src_ip,dst_ip,src_port,dst_port,protocol,flow_duration,total_fwd_packets,total_bwd_packets,total_len_fwd_packets,total_len_bwd_packets,...,active_std,active_max,active_min,idle_mean,idle_std,idle_max,idle_min,flow_rate_entropy,subflow_fwd_bytes,label
0,114.175.67.49,31.13.70.52,6194,443,17,11276457.0,149,298,49520,252965,...,0.0,0.258,0.258,0.1,0.0,0.1,0.1,1.848824,49520,instagram
1,2042:fcc7:e183:6e69:62da:a058:5538:c3cb,2a03:2880:f23d:c6:face:b00c:0:43fe,65171,443,17,88539330.0,85,141,52803,114386,...,0.046,0.117,0.025,19.85,19.467,39.317,0.383,1.861704,52803,instagram
2,114.175.67.14,157.240.30.63,13786,443,17,128007.0,6,11,4045,4018,...,0.0,0.128,0.128,7e-06,0.0,7e-06,0.0,2.133387,4045,instagram
3,221.113.143.197,157.240.30.13,63331,443,17,180281850.0,15,20,3438,6239,...,0.044503,0.121,0.015,30.0132,0.048565,30.063,29.953,2.037399,3438,instagram
4,114.175.67.49,31.13.70.52,6195,443,17,1230893.0,54,29,49564,12884,...,0.0,0.257,0.257,0.101,0.0,0.101,0.101,1.605101,49564,instagram


In [11]:
pd.set_option('display.max_rows', 70)
df.isnull().sum()

src_ip                   0
dst_ip                   0
src_port                 0
dst_port                 0
protocol                 0
flow_duration            0
total_fwd_packets        0
total_bwd_packets        0
total_len_fwd_packets    0
total_len_bwd_packets    0
fwd_pkt_len_max          0
fwd_pkt_len_min          0
fwd_pkt_len_mean         0
fwd_pkt_len_std          0
fwd_seg_size_min         0
bwd_pkt_len_max          0
bwd_pkt_len_min          0
bwd_pkt_len_mean         0
bwd_pkt_len_std          0
bwd_seg_size_min         0
pkt_len_mean             0
pkt_len_std              0
pkt_len_var              0
flow_bytes_per_s         0
flow_packets_per_s       0
fwd_packets_per_s        0
bwd_packets_per_s        0
bandwidth_bps            0
flow_iat_mean            0
flow_iat_std             0
flow_iat_max             0
flow_iat_min             0
fwd_iat_total            0
fwd_iat_mean             0
fwd_iat_std              0
fwd_iat_max              0
fwd_iat_min              0
b

In [12]:
cols_to_drop = [
 'fin_flag_cnt', 'syn_flag_cnt', 'rst_flag_cnt', 'psh_flag_cnt',
 'ack_flag_cnt', 'urg_flag_cnt', 'cwe_flag_cnt', 'ece_flag_cnt',
 'fwd_header_length', 'bwd_header_length',
 'active_mean', 'active_s  td', 'active_max', 'active_min',
 'idle_mean', 'idle_std', 'idle_max', 'idle_min',
 'subflow_fwd_bytes'
]

In [19]:
df = df.drop(columns = [c for c in cols_to_drop])

In [None]:
seed = 42

random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['PYTHONHASHSEED'] = str(seed)

In [None]:
CFG = {
    'local_epochs': 1,
    'lr': 1e-3,
    'loss_weights':{'traffic':1, 'duration':1, 'bandwidth':1},
    'test_size': 0.2,

    'n_clients_flat': 600,
    'n_clusters':3,
    'clients_per_cluster': 200,
    'client_frac': 1.0,

    'global_aggregator_cluster':1,

    'cluster_split':'equal',
    'client_split':'dirichlet',
    'alpha_client':0.4,
    'alpha_cluster':0.4,
}

In [None]:
exclude_traffic = ['src_ip', 'dst_ip', 'src_port', 'dst_port', 