In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os.path as path

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding, Flatten
from keras.layers import LSTM, SimpleRNN, GRU, Bidirectional, BatchNormalization, Conv1D, MaxPooling1D, Reshape, GlobalAveragePooling1D

2023-08-20 15:23:07.100743: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-20 15:23:07.145730: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
ROOT_DIR = path.abspath("../../data/datasets")

def csvfile(root_dir, train):
    # UNSW_NB15_testing-set.csv is actually more suitable for training because it has more data
    return path.join(root_dir, "UNSW_NB15_" + ("testing" if train else "training") + "-set.csv")

train = pd.read_csv(csvfile(ROOT_DIR, True))
test = pd.read_csv(csvfile(ROOT_DIR, False))

## Config

In [3]:
NUM_CLIENTS = 3
S_ADDR = "127.0.0.1:8080"
DEBUG = 0
MODEL_CKPT_DIR=path.abspath("../../model_ckpt/")

## Global Vars

In [4]:
fed_session = 1
histories = []

## Start to Preprocess

In [5]:
list_drop = ['id', 'attack_cat']
train = train.drop(list_drop, axis=1)
test = test.drop(list_drop, axis=1)

df = pd.concat([train, test])
len(df)

257673

## Removing outliers

In [6]:
# Select numeric categories
df_numeric = df.select_dtypes(include=[np.number])
df_numeric.describe(include='all')

# Remove outliers
for feature in df_numeric.columns:
    if DEBUG == 1:
        print(feature)
        print('max = '+str(df_numeric[feature].max()))
        print('75th = '+str(df_numeric[feature].quantile(0.95)))
        print('median = '+str(df_numeric[feature].median()))
        print(df_numeric[feature].max()>10*df_numeric[feature].median())
        print('----------------------------------------------------')
    if df_numeric[feature].max()>10*df_numeric[feature].median() and df_numeric[feature].max()>10 :
        df[feature] = np.where(df[feature] < df[feature].quantile(0.95), df[feature], df[feature].quantile(0.95))

df

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,label
0,0.121478,tcp,-,FIN,6.0,4.0,258.0,172.0,74.087490,252,...,1.0,1.0,1.0,0,0,0.0,1.0,1.0,0,0
1,0.649902,tcp,-,FIN,14.0,38.0,734.0,33044.0,78.473372,62,...,1.0,1.0,2.0,0,0,0.0,1.0,6.0,0,0
2,1.623129,tcp,-,FIN,8.0,16.0,364.0,13186.0,14.170161,62,...,1.0,1.0,3.0,0,0,0.0,2.0,6.0,0,0
3,1.681642,tcp,ftp,FIN,12.0,12.0,628.0,770.0,13.677108,62,...,1.0,1.0,3.0,1,1,0.0,2.0,1.0,0,0
4,0.449454,tcp,-,FIN,10.0,6.0,534.0,268.0,33.373826,254,...,2.0,1.0,34.0,0,0,0.0,2.0,34.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82327,0.000005,udp,-,INT,2.0,0.0,104.0,0.0,200000.005100,254,...,1.0,1.0,2.0,0,0,0.0,2.0,1.0,0,0
82328,1.106101,tcp,-,FIN,20.0,8.0,13454.0,354.0,24.410067,254,...,1.0,1.0,1.0,0,0,0.0,3.0,2.0,0,0
82329,0.000000,arp,-,INT,1.0,0.0,46.0,0.0,0.000000,0,...,1.0,1.0,1.0,0,0,0.0,1.0,1.0,1,0
82330,0.000000,arp,-,INT,1.0,0.0,46.0,0.0,0.000000,0,...,1.0,1.0,1.0,0,0,0.0,1.0,1.0,1,0


## Other unused pruning

In [7]:

# # Apply log to features > 50 unique values.
# df_numeric = df.select_dtypes(include=[np.number])
# for feature in df_numeric.columns:
#     if DEBUG == 1:
#         print(feature)
#         print('nunique = '+str(df_numeric[feature].nunique()))
#         print(df_numeric[feature].nunique()>50)
#         print('----------------------------------------------------')
#     if df_numeric[feature].nunique()>50:
#         if df_numeric[feature].min()==0:
#             df[feature] = np.log(df[feature]+1)
#         else:
#             df[feature] = np.log(df[feature])

# # Reduce labels of categorical features
# df_cat = df.select_dtypes(exclude=[np.number])
# for feature in df_cat.columns:
#     if DEBUG == 1:
#         print(feature)
#         print('nunique = '+str(df_cat[feature].nunique()))
#         print(df_cat[feature].nunique()>6)
#         print(df[feature].value_counts().head().index)
#         print(sum(df[feature].isin(df[feature].value_counts().head().index)))
#         print('----------------------------------------------------')
    
#     if df_cat[feature].nunique()>6:
#         df[feature] = np.where(df[feature].isin(df[feature].value_counts().head().index), df[feature], '-')


## Encoding and Normalisations

In [8]:

# One hot encoding
cols = ['proto', 'service', 'state']
for each in cols:
    dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(each, axis=1)


In [9]:

# Normalise
#Function to min-max normalize
def normalize(df, cols):
    """
    @param df pandas DataFrame
    @param cols a list of columns to encode
    @return a DataFrame with normalized specified features
    """
    result = df.copy() # do not touch the original df
    for feature_name in cols:
        max_value = df[feature_name].astype('float').max()
        min_value = df[feature_name].astype('float').min()
        if max_value > min_value:
            result[feature_name] = (df[feature_name].astype('float') - min_value) / (max_value - min_value)
    return result

new_train_df = normalize(df, df.columns)
new_train_df

Unnamed: 0,dur,spkts,dpkts,sbytes,dbytes,rate,sttl,dttl,sload,dload,...,state_CLO,state_CON,state_ECO,state_FIN,state_INT,state_PAR,state_REQ,state_RST,state_URN,state_no
0,0.043209,0.081967,0.064516,0.017424,0.005205,0.000222,0.988235,1.000000,0.000053,0.002142,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.231166,0.213115,0.612903,0.052867,1.000000,0.000235,0.243137,0.992126,0.000031,0.126990,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.577335,0.114754,0.258065,0.025316,0.399044,0.000043,0.243137,0.992126,0.000006,0.015365,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.598148,0.180328,0.193548,0.044974,0.023302,0.000041,0.243137,0.992126,0.000010,0.000847,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.159868,0.147541,0.096774,0.037975,0.008110,0.000100,0.996078,0.992126,0.000032,0.001005,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82327,0.000002,0.016393,0.000000,0.005957,0.000000,0.600000,0.996078,0.000000,0.312000,0.000000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
82328,0.393432,0.311475,0.129032,1.000000,0.010713,0.000073,0.996078,0.992126,0.000465,0.000565,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
82329,0.000000,0.000000,0.000000,0.001638,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
82330,0.000000,0.000000,0.000000,0.001638,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


## Resulting DataFrame

In [10]:
def partition(num_clients: int, cid: int, df: pd.DataFrame):
    n = len(df)
    div = n // num_clients
    start = (cid - 1) * div
    end = (cid) * div
    part = df.iloc[start:, :] if cid == num_clients else df.iloc[start:end, :]
    y = part["label"]
    X = part.drop(["label"], axis=1)
    return train_test_split(X, y, test_size=0.3, random_state=None)

## K-Fold validation

In [11]:
from sklearn.model_selection import cross_validate
from imblearn.over_sampling import RandomOverSampler
oversample = RandomOverSampler(sampling_strategy='minority')

# kfold = StratifiedKFold(n_splits=2,shuffle=True,random_state=42)
# kfold.get_n_splits(X_train, y_train)

## Model

In [12]:
import keras.backend as K

def sensitivity(y_true, y_pred): 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())

def specificity(y_true, y_pred): 
    true_positives = K.sum(K.round(K.clip(1 - y_true * 1 - y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(1 - y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())

In [13]:
batch_size = 32

def get_big_model():
    model = Sequential()
    model.add(Conv1D(64, kernel_size=64, padding="same",activation="relu",input_shape=(196, 1)))
    model.add(MaxPooling1D(pool_size=(10)))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(64, return_sequences=False)))
    model.add(Reshape((128, 1), input_shape = (128, )))
    model.add(MaxPooling1D(pool_size=(5)))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(128, return_sequences=False)))
    model.add(Dropout(0.6))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', sensitivity, specificity])
    return model

def get_med_cnn():
    model = Sequential()
    model.add(Conv1D(64, kernel_size=64, padding='same', activation='relu', input_shape=(196, 1)))
    model.add(MaxPooling1D(pool_size=(10)))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(64, return_sequences=False)))
    model.add(Dropout(0.6))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', sensitivity, specificity])
    return model

In [14]:
# from sklearn import metrics
# from sklearn.svm import LinearSVC


# oos_pred = []
# model = LinearSVC(gamma=0.6)

# cross_validate(model, X, y, cv=10)
# for train_index, test_index in kfold.split(X_train, y_train):
#     train_X, test_X = X_train.iloc[train_index], X_train.iloc[test_index]
#     train_y, test_y = y_train.iloc[train_index], y_train.iloc[test_index]
    
#     print("train index:", train_index)
#     print("test index:", test_index)
#     print(train_y.value_counts())
    
#     train_X_over,train_y_over= oversample.fit_resample(train_X, train_y)
#     print(train_y_over.value_counts())
    
#     x_columns_train = new_train_df.columns.drop('label')
#     x_train_array = train_X_over[x_columns_train].values
#     x_train_1=np.reshape(x_train_array, (x_train_array.shape[0], x_train_array.shape[1], 1))
    
#     y_train_1 = train_y_over.values # Classification
    
#     x_columns_test = new_train_df.columns.drop('label')
#     x_test_array = test_X[x_columns_test].values
#     x_test_2=np.reshape(x_test_array, (x_test_array.shape[0], x_test_array.shape[1], 1))
    
#     y_test_2 = test_y.values # Classification
    

#     model.fit(x_train_1, y_train_1,validation_data=(x_test_2,y_test_2), epochs=15)
    
#     pred = model.predict(x_test_2)
#     pred = np.argmax(pred,axis=1)
#     y_eval = y_test_2.astype('int')
#     score = metrics.accuracy_score(y_eval, pred)
#     oos_pred.append(score)
#     print("Validation score: {}".format(score))

In [15]:
# oos_pred

In [16]:
# from sklearn.metrics import confusion_matrix
# import numpy as np
# from scipy import interp
# import matplotlib.pyplot as plt
# from itertools import cycle
# from sklearn.metrics import roc_curve, auc

# pred1 = model.predict(X_test)
# # Plot linewidth.
# lw = 2

## Client

In [17]:
from typing import Dict
import flwr as fl
from flwr.common import Config, Scalar
import tensorflow as tf

import ipfshttpclient2 as ipfshttpclient


client_received_param = None
client_trained_param = None

class BFLClient(fl.client.NumPyClient):

    def __init__(self, cid: int, model: tf.keras.Model, x_train, y_train, x_test, y_test) -> None:
        self.model: Sequential = model
        self.cid = cid
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self._ipfs_client: ipfshttpclient.client.Client = None

    def get_ipfs_client(self):
        if not self._ipfs_client:
            self._ipfs_client = ipfshttpclient.connect()
        return self._ipfs_client

    def get_parameters(self, config):
        return self.model.get_weights()
    
    def set_parameters_from_file(self, file):
        return self.model.load_weights(file)

    def fit(self, parameters, config):
        self.model.set_weights(parameters)
        with tf.device('/device:gpu:0'):
            self.model.fit(x=self.x_train, y=self.y_train, epochs=5, batch_size=32)
        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        if parameters:
            self.model.set_weights(parameters)
        loss, accuracy, sensitivity, specificity = self.model.evaluate(self.x_test, self.y_test)
        return loss, len(self.x_test), {"accuracy": float(accuracy), "sensitivity": sensitivity, "specificity": specificity}

## FedAvg Strategy

In [18]:
from typing import Dict, List, Tuple
import flwr as fl
from flwr.common import FitRes, Parameters, Scalar
from flwr.server.client_proxy import ClientProxy
from flwr.server.strategy import FedAvg
import numpy as np

param_storer = None

class BFedAvg(FedAvg):
    def __init__(self, *args, save_path, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_path = save_path
    
    def set_fed_session(self, fed_session: int):
        self.fed_session = fed_session

    def get_fed_session(self):
        return self.fed_session

## BFLServer

### History

In [19]:
"""Training history retrieved from the blockchain ledger."""

from functools import reduce
from typing import Dict, List, Tuple

from flwr.common.typing import Scalar
from flwr.server import History

class BFedHistory(History):
    """History class for training and/or evaluation metrics collection."""

    def __init__(self, client_name, algorithm) -> None:
        self.client_name = client_name
        self.losses_distributed: List[Tuple[int, float]] = []
        self.losses_centralized: List[Tuple[int, float]] = []
        self.metrics_distributed_fit: Dict[str, List[Tuple[int, Scalar]]] = {}
        self.metrics_distributed: Dict[str, List[Tuple[int, Scalar]]] = {}
        self.metrics_centralized: Dict[str, List[Tuple[int, Scalar]]] = {}
        self.current_fed_session: int = fed_session
        self.algorithm: str = algorithm

    def add_loss_distributed(self, server_round: int, loss: float) -> None:
        """Add one loss entry (from distributed evaluation)."""
        self.losses_distributed.append((server_round, loss))

    def add_metrics_distributed(
        self, server_round: int, metrics: Dict[str, Scalar]
    ) -> None:
        """Add metrics entries (from distributed evaluation)."""
        for key in metrics:
            # if not (isinstance(metrics[key], float) or isinstance(metrics[key], int)):
            #     continue  # ignore non-numeric key/value pairs
            if key not in self.metrics_distributed:
                self.metrics_distributed[key] = []
            self.metrics_distributed[key].append((server_round, metrics[key]))

    def __repr__(self) -> str:
        rep = ""
        if self.losses_distributed:
            rep += "History (loss, distributed):\n" + reduce(
                lambda a, b: a + b,
                [
                    f"\tround {server_round}: {loss}\n"
                    for server_round, loss in self.losses_distributed
                ],
            )
        if self.losses_centralized:
            rep += "History (loss, centralized):\n" + reduce(
                lambda a, b: a + b,
                [
                    f"\tround {server_round}: {loss}\n"
                    for server_round, loss in self.losses_centralized
                ],
            )
        if self.metrics_distributed_fit:
            rep += "History (metrics, distributed, fit):\n" + str(
                self.metrics_distributed_fit
            )
        if self.metrics_distributed:
            rep += "History (metrics, distributed, evaluate):\n" + str(
                self.metrics_distributed
            )
        if self.metrics_centralized:
            rep += "History (metrics, centralized):\n" + str(self.metrics_centralized)
        return rep

### Server

In [20]:
import timeit
import flwr as fl
from flwr.server import Server
from flwr.client import Client
from flwr.common.typing import GetPropertiesIns

from flwr.common.logger import log
from logging import INFO

ipfs_cid = None

class BFLServer(Server):
    def __init__(self, associated_client_id: str, algorithm_name: str, **kwargs):
        Server.__init__(self, **kwargs)
        self.associated_client_id: str = associated_client_id
        self.algorithm = algorithm_name

    def fit(self, num_rounds: int, timeout: float | None) -> BFedHistory:
        """Run federated averaging for a number of rounds."""

        history = BFedHistory("user1@org1.example.com", self.algorithm)
        fed_session = history.current_fed_session
        self.strategy: BFedAvg.set_fed_session(fed_session)

        # Initialize parameters
        log(INFO, "Initializing global parameters")
        self.parameters = self._get_initial_parameters(timeout=timeout)

        log(INFO, "FL starting")
        start_time = timeit.default_timer()

        for current_round in range(1, num_rounds + 1):
            # Train model and replace previous global model
            res_fit = self.fit_round(
                server_round=current_round,
                timeout=timeout,
            )
            if res_fit is not None:
                parameters_prime, _, _ = res_fit  # fit_metrics_aggregated
                if parameters_prime:
                    self.parameters = parameters_prime

            # Evaluate model on a sample of available clients
            res_fed = self.evaluate_round(server_round=current_round, timeout=timeout)
            if res_fed is not None:
                loss_fed, evaluate_metrics_fed, _ = res_fed
                if loss_fed is not None:
                    history.add_loss_distributed(
                        server_round=current_round, loss=loss_fed
                    )
                    history.add_metrics_distributed(
                        server_round=current_round, metrics=evaluate_metrics_fed
                    )

            
            model = get_med_cnn()
            model.set_weights(fl.common.parameters_to_ndarrays(self.parameters))
            file_name = f"notebook_gmodel_rc{history.current_fed_session}_r{current_round}.keras"
            file_path= path.join(MODEL_CKPT_DIR, file_name)
            model.save_weights(file_path)
            # print("About the client ipfs cli", properties.ipfs_client)

        fed_session += 1
        # Bookkeeping
        end_time = timeit.default_timer()
        elapsed = end_time - start_time
        log(INFO, "FL finished in %s", elapsed)
        return history

## Load Data

In [21]:
client_ids = [str(i) for i in range(1, NUM_CLIENTS+1)]

client_data = {}
for cid in client_ids:
    client_data[cid] = partition(NUM_CLIENTS, int(cid), new_train_df)

In [22]:
from flwr.common.typing import Metrics
from typing import List, Tuple

def client_fn(cid: str):
    model = get_med_cnn()
    print(f"Loading data for client {cid}")
    X_train, X_test, y_train, y_test = client_data[cid]
    # Start client
    print(f"Client {cid} connecting to server {S_ADDR}")
    return BFLClient(cid, model, x_train=X_train, x_test=X_test, y_train=y_train, y_test=y_test)

def eval_metrics_aggregation_fn(results: List[Tuple[int, Metrics]]):
    # Weigh accuracy of each client by number of examples used
    accuracies = [metric["accuracy"] * num for num, metric in results]
    examples = [num for num, _ in results]

    # Aggregate and print custom metric
    aggregated_accuracy = sum(accuracies) / sum(examples)
    return {"accuracy": aggregated_accuracy}

strategy = BFedAvg(
    save_path="./test_save",
    evaluate_metrics_aggregation_fn=eval_metrics_aggregation_fn
)

## Simulation

In [23]:
from flwr.server.client_manager import SimpleClientManager

fl.simulation.start_simulation(
    client_fn = client_fn,
    clients_ids = [str(i) for i in range(1, NUM_CLIENTS+1)],
    server = BFLServer('1', "BiLSTM", client_manager=SimpleClientManager(), strategy=strategy),
    strategy = strategy,
    num_clients = NUM_CLIENTS,
    config = fl.server.ServerConfig(num_rounds=4),
    client_resources=None,
)

INFO flwr 2023-08-20 15:23:13,779 | app.py:146 | Starting Flower simulation, config: ServerConfig(num_rounds=4, round_timeout=None)
2023-08-20 15:23:15,889	INFO worker.py:1636 -- Started a local Ray instance.
INFO flwr 2023-08-20 15:23:16,590 | app.py:180 | Flower VCE: Ray initialized with resources: {'CPU': 12.0, 'object_store_memory': 1703791411.0, 'node:172.19.27.35': 1.0, 'memory': 3407582823.0}
INFO flwr 2023-08-20 15:23:16,590 | 1374211194.py:26 | Initializing global parameters
INFO flwr 2023-08-20 15:23:16,591 | server.py:273 | Requesting initial parameters from one random client
[2m[36m(launch_and_get_parameters pid=14862)[0m 2023-08-20 15:23:18.701766: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
INFO flwr 2023-08-20 15:23:19,367 | server.py:277 | Received initial parameters from one random client
INFO flwr 2023-08-20 15:23:19,368 | 1374211194.py:29 | FL starting
DEBUG flwr

[2m[36m(launch_and_get_parameters pid=14862)[0m Loading data for client 1
[2m[36m(launch_and_get_parameters pid=14862)[0m Client 1 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_fit pid=14862)[0m Loading data for client 2
[2m[36m(launch_and_fit pid=14862)[0m Client 2 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_fit pid=14862)[0m Epoch 1/5


[2m[36m(pid=14859)[0m 2023-08-20 15:23:23.254157: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f3822fbe0>
[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f38270850>


   1/1879 [..............................] - ETA: 1:28:38 - loss: 0.8121 - accuracy: 0.3750 - sensitivity: 0.3548 - specificity: 1.0000
   1/1879 [..............................] - ETA: 1:28:38 - loss: 0.8121 - accuracy: 0.3750 - sensitivity: 0.3548 - specificity: 1.0000
   1/1879 [..............................] - ETA: 1:28:38 - loss: 0.8121 - accuracy: 0.3750 - sensitivity: 0.3548 - specificity: 1.0000
   9/1879 [..............................] - ETA: 24s - loss: 0.3439 - accuracy: 0.8750 - sensitivity: 0.9124 - specificity: 0.1389   
  17/1879 [..............................] - ETA: 24s - loss: 0.2767 - accuracy: 0.9026 - sensitivity: 0.9536 - specificity: 0.0735
  25/1879 [..............................] - ETA: 24s - loss: 0.2410 - accuracy: 0.9175 - sensitivity: 0.9685 - specificity: 0.0500
  33/1879 [..............................] - ETA: 24s - loss: 0.2130 - accuracy: 0.9271 - sensitivity: 0.9761 - specificity: 0.1288
  41/1879 [..............................] - ETA: 23s - loss:

[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8ef02df100>[32m [repeated 4x across cluster][0m


   1/1879 [..............................] - ETA: 1:23:33 - loss: 0.7373 - accuracy: 0.3438 - sensitivity: 0.2778 - specificity: 0.4286
   1/1879 [..............................] - ETA: 1:23:33 - loss: 0.7373 - accuracy: 0.3438 - sensitivity: 0.2778 - specificity: 0.4286
   9/1879 [..............................] - ETA: 28s - loss: 0.6535 - accuracy: 0.6146 - sensitivity: 0.4824 - specificity: 0.7340   
  17/1879 [..............................] - ETA: 26s - loss: 0.5756 - accuracy: 0.7004 - sensitivity: 0.5570 - specificity: 0.8042
  21/1879 [..............................] - ETA: 26s - loss: 0.5491 - accuracy: 0.7128 - sensitivity: 0.5643 - specificity: 0.8155
  27/1879 [..............................] - ETA: 38s - loss: 0.5072 - accuracy: 0.7373 - sensitivity: 0.6135 - specificity: 0.8251
  36/1879 [..............................] - ETA: 34s - loss: 0.4797 - accuracy: 0.7578 - sensitivity: 0.6746 - specificity: 0.8182
  40/1879 [..............................] - ETA: 33s - loss: 0.4

DEBUG flwr 2023-08-20 15:28:04,860 | server.py:232 | fit_round 1 received 3 results and 0 failures
DEBUG flwr 2023-08-20 15:28:04,870 | server.py:168 | evaluate_round 1: strategy sampled 3 clients (out of 3)




ERROR flwr 2023-08-20 15:28:05,841 | ray_client_proxy.py:104 | Task was killed due to the node running low on memory.
Memory on the node (IP: 172.19.27.35, ID: a0f151e0d8561303a4858a1c424086dceadc03c2243975f744448516) where the task (task ID: dc170474ae3559c60c764bd5ed48fae4bca9726c01000000, name=launch_and_evaluate, pid=14859, memory used=0.49GB) was running was 11.02GB / 11.50GB (0.957955), which exceeds the memory usage threshold of 0.95. Ray killed this worker (ID: b51744aa53cf89dd313177a0eb4e5883d53adaede6a332667cfcd0c1) because it was the most recently scheduled task; to see more information about memory usage on this node, use `ray logs raylet.out -ip 172.19.27.35`. To see the logs of the worker, use `ray logs worker-b51744aa53cf89dd313177a0eb4e5883d53adaede6a332667cfcd0c1*out -ip 172.19.27.35. Top 10 memory users:
PID	MEM(GB)	COMMAND
14662	1.56	/home/dylonwong/miniconda3/envs/fedlearn/bin/python -m ipykernel_launcher --ip=127.0.0.1 --stdin=900...
14578	1.28	/home/dylonwong/mini

[2m[36m(launch_and_evaluate pid=14859)[0m Loading data for client 3
[2m[36m(launch_and_evaluate pid=14859)[0m Client 3 connecting to server 127.0.0.1:8080


[2m[36m(launch_and_evaluate pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_evaluate pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f769a36d0>[32m [repeated 2x across cluster][0m


  1/806 [..............................] - ETA: 21:04 - loss: 0.3642 - accuracy: 0.8125 - sensitivity: 0.8125 - specificity: 0.8125
 20/806 [..............................] - ETA: 4s - loss: 0.3659 - accuracy: 0.8266 - sensitivity: 0.8372 - specificity: 0.8291 
 37/806 [>.............................] - ETA: 4s - loss: 0.3904 - accuracy: 0.8032 - sensitivity: 0.8159 - specificity: 0.8009
 53/806 [>.............................] - ETA: 4s - loss: 0.3819 - accuracy: 0.8107 - sensitivity: 0.8220 - specificity: 0.8078
 72/806 [=>............................] - ETA: 4s - loss: 0.3841 - accuracy: 0.8047 - sensitivity: 0.8221 - specificity: 0.7958
 89/806 [==>...........................] - ETA: 4s - loss: 0.3849 - accuracy: 0.8027 - sensitivity: 0.8202 - specificity: 0.7932
107/806 [==>...........................] - ETA: 4s - loss: 0.3841 - accuracy: 0.8043 - sensitivity: 0.8255 - specificity: 0.7908
125/806 [===>..........................] - ETA: 4s - loss: 0.3805 - accuracy: 0.8060 - sensit





[2m[36m(launch_and_evaluate pid=14860)[0m 2023-08-20 15:28:11.258928: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


[2m[36m(launch_and_evaluate pid=14860)[0m Loading data for client 2
[2m[36m(launch_and_evaluate pid=14860)[0m Client 2 connecting to server 127.0.0.1:8080


[2m[36m(launch_and_evaluate pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_evaluate pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa8f8717940>[32m [repeated 2x across cluster][0m


  1/806 [..............................] - ETA: 15:21 - loss: 0.3272 - accuracy: 0.8750 - sensitivity: 0.8667 - specificity: 1.0000
 10/806 [..............................] - ETA: 4s - loss: 0.3341 - accuracy: 0.8687 - sensitivity: 0.8724 - specificity: 0.6917   
 18/806 [..............................] - ETA: 4s - loss: 0.3097 - accuracy: 0.8837 - sensitivity: 0.8874 - specificity: 0.6991
 37/806 [>.............................] - ETA: 4s - loss: 0.3075 - accuracy: 0.8826 - sensitivity: 0.8847 - specificity: 0.6568
 57/806 [=>............................] - ETA: 4s - loss: 0.3084 - accuracy: 0.8772 - sensitivity: 0.8801 - specificity: 0.6211
 78/806 [=>............................] - ETA: 3s - loss: 0.3079 - accuracy: 0.8754 - sensitivity: 0.8781 - specificity: 0.6216
 99/806 [==>...........................] - ETA: 3s - loss: 0.3081 - accuracy: 0.8741 - sensitivity: 0.8753 - specificity: 0.6648
121/806 [===>..........................] - ETA: 3s - loss: 0.3119 - accuracy: 0.8701 - sens

[2m[33m(raylet)[0m [2023-08-20 15:28:15,829 E 14798 14798] (raylet) node_manager.cc:3069: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: a0f151e0d8561303a4858a1c424086dceadc03c2243975f744448516, IP: 172.19.27.35) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.19.27.35`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.




DEBUG flwr 2023-08-20 15:28:17,586 | server.py:182 | evaluate_round 1 received 2 results and 1 failures
2023-08-20 15:28:17.646382: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-20 15:28:17.865990: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-20 15:28:17.866057: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-20 15:28:17.868637: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_nod



2023-08-20 15:28:19.597326: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 15:28:19.598706: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 15:28:19.599907: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

[2m[36m(launch_and_fit pid=14862)[0m Loading data for client 2
[2m[36m(launch_and_fit pid=14862)[0m Client 2 connecting to server 127.0.0.1:8080


[2m[36m(raylet)[0m Spilled 2753 MiB, 7 objects, write throughput 541 MiB/s. Set RAY_verbose_spill_logs=0 to disable this message.


[2m[36m(launch_and_fit pid=14862)[0m Epoch 1/5


[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f381e02e0>[32m [repeated 2x across cluster][0m


   5/1879 [..............................] - ETA: 29s - loss: 0.0364 - accuracy: 0.9875 - sensitivity: 1.0000 - specificity: 0.8500    
  12/1879 [..............................] - ETA: 30s - loss: 0.0339 - accuracy: 0.9896 - sensitivity: 1.0000 - specificity: 0.7917
  16/1879 [..............................] - ETA: 29s - loss: 0.0636 - accuracy: 0.9824 - sensitivity: 1.0000 - specificity: 0.7500
   1/1879 [..............................] - ETA: 1:33:30 - loss: 0.6495 - accuracy: 0.7188 - sensitivity: 1.0000 - specificity: 0.3077
  23/1879 [..............................] - ETA: 30s - loss: 0.0606 - accuracy: 0.9837 - sensitivity: 1.0000 - specificity: 0.7978
   4/1879 [..............................] - ETA: 38s - loss: 0.3697 - accuracy: 0.8203 - sensitivity: 0.9484 - specificity: 0.6542    
  31/1879 [..............................] - ETA: 30s - loss: 0.0584 - accuracy: 0.9849 - sensitivity: 1.0000 - specificity: 0.7747
  64/1879 [>.............................] - ETA: 34s - loss: 0.

[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 3x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8eeada9600>[32m [repeated 3x across cluster][0m


   1/1879 [..............................] - ETA: 1:37:37 - loss: 0.2787 - accuracy: 0.8438 - sensitivity: 1.0000 - specificity: 0.7368
   1/1879 [..............................] - ETA: 1:37:37 - loss: 0.2787 - accuracy: 0.8438 - sensitivity: 1.0000 - specificity: 0.7368
   1/1879 [..............................] - ETA: 1:37:37 - loss: 0.2787 - accuracy: 0.8438 - sensitivity: 1.0000 - specificity: 0.7368
   5/1879 [..............................] - ETA: 25s - loss: 0.2259 - accuracy: 0.8938 - sensitivity: 0.9346 - specificity: 0.8676    
  13/1879 [..............................] - ETA: 25s - loss: 0.2591 - accuracy: 0.8678 - sensitivity: 0.8037 - specificity: 0.9198
  21/1879 [..............................] - ETA: 26s - loss: 0.2268 - accuracy: 0.8854 - sensitivity: 0.8240 - specificity: 0.9300
  29/1879 [..............................] - ETA: 26s - loss: 0.2416 - accuracy: 0.8901 - sensitivity: 0.8467 - specificity: 0.9202
  37/1879 [..............................] - ETA: 26s - loss

DEBUG flwr 2023-08-20 15:33:19,655 | server.py:232 | fit_round 2 received 3 results and 0 failures
DEBUG flwr 2023-08-20 15:33:19,664 | server.py:168 | evaluate_round 2: strategy sampled 3 clients (out of 3)


[2m[36m(launch_and_evaluate pid=14860)[0m Loading data for client 3
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080


[2m[36m(launch_and_evaluate pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 3x across cluster][0m
[2m[36m(launch_and_evaluate pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa8b81fdf30>[32m [repeated 3x across cluster][0m


 10/806 [..............................] - ETA: 4s - loss: 0.3985 - accuracy: 0.8250 - sensitivity: 1.0000 - specificity: 0.6489   
 26/806 [..............................] - ETA: 4s - loss: 0.3557 - accuracy: 0.8353 - sensitivity: 0.9954 - specificity: 0.6561
 41/806 [>.............................] - ETA: 5s - loss: 0.3653 - accuracy: 0.8293 - sensitivity: 0.9943 - specificity: 0.6306
 49/806 [>.............................] - ETA: 5s - loss: 0.3604 - accuracy: 0.8278 - sensitivity: 0.9932 - specificity: 0.6279
 58/806 [=>............................] - ETA: 4s - loss: 0.2958 - accuracy: 0.8642 - sensitivity: 0.9387 - specificity: 0.8145
 66/806 [=>............................] - ETA: 4s - loss: 0.3575 - accuracy: 0.8319 - sensitivity: 0.9921 - specificity: 0.6409
 82/806 [==>...........................] - ETA: 4s - loss: 0.3548 - accuracy: 0.8300 - sensitivity: 0.9931 - specificity: 0.6323
 95/806 [==>...........................] - ETA: 4s - loss: 0.3421 - accuracy: 0.8362 - sensiti

[2m[36m(launch_and_evaluate pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_evaluate pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa8d83e0ee0>[32m [repeated 4x across cluster][0m


  1/806 [..............................] - ETA: 13:40 - loss: 0.0617 - accuracy: 0.9688 - sensitivity: 0.9667 - specificity: 1.0000
  1/806 [..............................] - ETA: 13:40 - loss: 0.0617 - accuracy: 0.9688 - sensitivity: 0.9667 - specificity: 1.0000
 19/806 [..............................] - ETA: 4s - loss: 0.0617 - accuracy: 0.9720 - sensitivity: 0.9796 - specificity: 0.7018 
 34/806 [>.............................] - ETA: 4s - loss: 0.0700 - accuracy: 0.9678 - sensitivity: 0.9786 - specificity: 0.6564
 34/806 [>.............................] - ETA: 4s - loss: 0.0700 - accuracy: 0.9678 - sensitivity: 0.9786 - specificity: 0.6564
 50/806 [>.............................] - ETA: 4s - loss: 0.0723 - accuracy: 0.9694 - sensitivity: 0.9796 - specificity: 0.5863
 70/806 [=>............................] - ETA: 4s - loss: 0.0740 - accuracy: 0.9674 - sensitivity: 0.9773 - specificity: 0.6195
 90/806 [==>...........................] - ETA: 4s - loss: 0.0735 - accuracy: 0.9684 - sen

DEBUG flwr 2023-08-20 15:33:33,685 | server.py:182 | evaluate_round 2 received 3 results and 0 failures




2023-08-20 15:33:33.884515: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 15:33:33.886312: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 15:33:33.887853: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

[2m[36m(launch_and_fit pid=14862)[0m Loading data for client 3
[2m[36m(launch_and_fit pid=14862)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 3 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_fit pid=14862)[0m Epoch 1/5
[2m[36m(launch_and_evaluate pid=14860)[0m Epoch 1/5


[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f3822d1b0>[32m [repeated 2x across cluster][0m


   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA: 33s - loss: 0.1905 - accuracy: 0.9062 - sensitivity: 0.9868 - specificity: 0.7816    
   4/1879 [..............................] - ETA

[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8eead1ed10>[32m [repeated 4x across cluster][0m


   5/1879 [..............................] - ETA: 25s - loss: 0.2392 - accuracy: 0.9250 - sensitivity: 0.9229 - specificity: 0.9286    
   5/1879 [..............................] - ETA: 25s - loss: 0.2392 - accuracy: 0.9250 - sensitivity: 0.9229 - specificity: 0.9286    
   5/1879 [..............................] - ETA: 25s - loss: 0.2392 - accuracy: 0.9250 - sensitivity: 0.9229 - specificity: 0.9286    
  13/1879 [..............................] - ETA: 25s - loss: 0.2464 - accuracy: 0.9014 - sensitivity: 0.8591 - specificity: 0.9331
  21/1879 [..............................] - ETA: 25s - loss: 0.2162 - accuracy: 0.9003 - sensitivity: 0.8305 - specificity: 0.9509
  25/1879 [..............................] - ETA: 25s - loss: 0.2069 - accuracy: 0.9038 - sensitivity: 0.8413 - specificity: 0.9504
  25/1879 [..............................] - ETA: 25s - loss: 0.2069 - accuracy: 0.9038 - sensitivity: 0.8413 - specificity: 0.9504
  33/1879 [..............................] - ETA: 25s - loss: 0.

DEBUG flwr 2023-08-20 15:38:38,783 | server.py:232 | fit_round 3 received 3 results and 0 failures
DEBUG flwr 2023-08-20 15:38:38,788 | server.py:168 | evaluate_round 3: strategy sampled 3 clients (out of 3)


[2m[36m(launch_and_evaluate pid=14862)[0m Loading data for client 3
[2m[36m(launch_and_evaluate pid=14862)[0m Client 3 connecting to server 127.0.0.1:8080


[2m[36m(launch_and_evaluate pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
[2m[36m(launch_and_evaluate pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa90c88ca60>


  1/806 [..............................] - ETA: 57:17 - loss: 0.3553 - accuracy: 0.8125 - sensitivity: 1.0000 - specificity: 0.6842
 15/806 [..............................] - ETA: 6s - loss: 0.3256 - accuracy: 0.8292 - sensitivity: 1.0000 - specificity: 0.6475 
 29/806 [>.............................] - ETA: 6s - loss: 0.3311 - accuracy: 0.8287 - sensitivity: 0.9975 - specificity: 0.6332
 42/806 [>.............................] - ETA: 6s - loss: 0.3268 - accuracy: 0.8304 - sensitivity: 0.9970 - specificity: 0.6309
 56/806 [=>............................] - ETA: 5s - loss: 0.3219 - accuracy: 0.8337 - sensitivity: 0.9969 - specificity: 0.6396
 68/806 [=>............................] - ETA: 5s - loss: 0.3228 - accuracy: 0.8327 - sensitivity: 0.9959 - specificity: 0.6347
 68/806 [=>............................] - ETA: 5s - loss: 0.3228 - accuracy: 0.8327 - sensitivity: 0.9959 - specificity: 0.6347
 68/806 [=>............................] - ETA: 5s - loss: 0.3228 - accuracy: 0.8327 - sensit

[2m[36m(launch_and_evaluate pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_evaluate pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa8b8371150>[32m [repeated 4x across cluster][0m


 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 11/806 [..............................] - ETA: 4s - loss: 0.0523 - accuracy: 0.9801 - sensitivity: 0.9970 - specificity: 0.5530   
 30/806 [>.............................] - ETA: 4s - loss: 0.0572 - accuracy

DEBUG flwr 2023-08-20 15:38:56,479 | server.py:182 | evaluate_round 3 received 3 results and 0 failures




2023-08-20 15:38:57.517343: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 15:38:57.519209: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 15:38:57.520373: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

[2m[36m(launch_and_fit pid=14862)[0m Loading data for client 1
[2m[36m(launch_and_fit pid=14862)[0m Client 1 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 1 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_evaluate pid=14860)[0m Client 1 connecting to server 127.0.0.1:8080
[2m[36m(launch_and_fit pid=14860)[0m Epoch 1/5


[2m[36m(launch_and_fit pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8ef021bb20>[32m [repeated 2x across cluster][0m
[2m[36m(raylet)[0m Spilled 4326 MiB, 12 objects, write throughput 404 MiB/s.


   1/1879 [..............................] - ETA: 2:05:15 - loss: 0.1756 - accuracy: 0.9062 - sensitivity: 0.8182 - specificity: 0.9524
   9/1879 [..............................] - ETA: 31s - loss: 0.1728 - accuracy: 0.9271 - sensitivity: 0.9276 - specificity: 0.9266   
  16/1879 [..............................] - ETA: 30s - loss: 0.1620 - accuracy: 0.9316 - sensitivity: 0.9047 - specificity: 0.9471
  19/1879 [..............................] - ETA: 31s - loss: 0.1660 - accuracy: 0.9309 - sensitivity: 0.8936 - specificity: 0.9530
  22/1879 [..............................] - ETA: 32s - loss: 0.1692 - accuracy: 0.9261 - sensitivity: 0.8711 - specificity: 0.9595
  25/1879 [..............................] - ETA: 32s - loss: 0.1641 - accuracy: 0.9237 - sensitivity: 0.8586 - specificity: 0.9602
  39/1879 [..............................] - ETA: 31s - loss: 0.1696 - accuracy: 0.9151 - sensitivity: 0.8531 - specificity: 0.9561
  39/1879 [..............................] - ETA: 31s - loss: 0.1696 

[2m[36m(launch_and_fit pid=14860)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_fit pid=14860)[0m Cause: Unknown node type <gast.gast.Import object at 0x7fa8943c6a10>[32m [repeated 4x across cluster][0m


   4/1879 [..............................] - ETA: 37s - loss: 0.0638 - accuracy: 0.9688 - sensitivity: 0.9917 - specificity: 0.6250    
   4/1879 [..............................] - ETA: 37s - loss: 0.0638 - accuracy: 0.9688 - sensitivity: 0.9917 - specificity: 0.6250    
   7/1879 [..............................] - ETA: 37s - loss: 0.0632 - accuracy: 0.9643 - sensitivity: 0.9857 - specificity: 0.6429
  13/1879 [..............................] - ETA: 36s - loss: 0.0723 - accuracy: 0.9736 - sensitivity: 0.9897 - specificity: 0.7051
  19/1879 [..............................] - ETA: 36s - loss: 0.0780 - accuracy: 0.9753 - sensitivity: 0.9930 - specificity: 0.6404
  25/1879 [..............................] - ETA: 36s - loss: 0.0699 - accuracy: 0.9762 - sensitivity: 0.9947 - specificity: 0.5967
  29/1879 [..............................] - ETA: 35s - loss: 0.0724 - accuracy: 0.9752 - sensitivity: 0.9954 - specificity: 0.5718
  37/1879 [..............................] - ETA: 33s - loss: 0.0682

DEBUG flwr 2023-08-20 15:44:05,121 | server.py:232 | fit_round 4 received 3 results and 0 failures
DEBUG flwr 2023-08-20 15:44:05,126 | server.py:168 | evaluate_round 4: strategy sampled 3 clients (out of 3)


[2m[36m(launch_and_evaluate pid=14862)[0m Loading data for client 1
[2m[36m(launch_and_evaluate pid=14862)[0m Client 1 connecting to server 127.0.0.1:8080


[2m[36m(launch_and_evaluate pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_evaluate pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f285c9c90>[32m [repeated 2x across cluster][0m


  9/806 [..............................] - ETA: 5s - loss: 0.1856 - accuracy: 0.9062 - sensitivity: 0.9289 - specificity: 0.8969   
  1/806 [..............................] - ETA: 14:53 - loss: 0.0643 - accuracy: 0.9688 - sensitivity: 0.9667 - specificity: 1.0000
 17/806 [..............................] - ETA: 5s - loss: 0.2004 - accuracy: 0.9026 - sensitivity: 0.9380 - specificity: 0.8858
 16/806 [..............................] - ETA: 5s - loss: 0.0517 - accuracy: 0.9785 - sensitivity: 0.9838 - specificity: 0.6979 
 25/806 [..............................] - ETA: 5s - loss: 0.2009 - accuracy: 0.9038 - sensitivity: 0.9431 - specificity: 0.8864
 33/806 [>.............................] - ETA: 5s - loss: 0.2068 - accuracy: 0.8939 - sensitivity: 0.9300 - specificity: 0.8760
 32/806 [>.............................] - ETA: 5s - loss: 0.0636 - accuracy: 0.9736 - sensitivity: 0.9790 - specificity: 0.7245
 49/806 [>.............................] - ETA: 5s - loss: 0.2012 - accuracy: 0.8973 - sen

[2m[36m(launch_and_evaluate pid=14862)[0m Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.[32m [repeated 4x across cluster][0m
[2m[36m(launch_and_evaluate pid=14862)[0m Cause: Unknown node type <gast.gast.Import object at 0x7f8f287cf520>[32m [repeated 4x across cluster][0m


  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accuracy: 0.8625 - sensitivity: 1.0000 - specificity: 0.7161  
  5/806 [..............................] - ETA: 12s - loss: 0.2652 - accurac

DEBUG flwr 2023-08-20 15:44:20,865 | server.py:182 | evaluate_round 4 received 3 results and 0 failures




2023-08-20 15:44:21.197098: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-08-20 15:44:21.201264: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-08-20 15:44:21.203278: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

History (loss, distributed):
	round 1: 0.34898512065410614
	round 2: 0.23147646337747574
	round 3: 0.1883642723162969
	round 4: 0.17881779621044794
History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.8333398103713989), (2, 0.8920366366704305), (3, 0.9051795601844788), (4, 0.9137819608052572)]}

[2m[33m(raylet)[0m [2023-08-20 16:01:15,850 E 14798 14798] (raylet) node_manager.cc:3069: 2 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: a0f151e0d8561303a4858a1c424086dceadc03c2243975f744448516, IP: 172.19.27.35) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 172.19.27.35`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


In [None]:
ipfs_client = ipfshttpclient.connect()

In [None]:
with open("../../model_ckpt/from_ipfs.keras", 'wb+') as f:
    f.write(ipfs_client.cat(f"/ipfs/{ipfs_cid['Hash']}"))
    f.flush()
    f.close()

client = client_fn('2')
client.set_parameters_from_file("../../model_ckpt/from_ipfs.keras")

# Evaluated result should be same as shown in simulation output

client.evaluate(None, {})

In [None]:
import pickle
import os

def load_model(path: str):
    try:
        with open(path, 'rb') as file:
            return pickle.load(file)
    except IOError:
        print(f"Error loading file at {path}")
        return None
    
# prm = load_model(os.path.abspath("../../model_ckpt/model_r3.ckpt"))


## Hashing

In [None]:
client_param = fl.common.parameters_to_ndarrays(param_storer)
bytes_client_param = b''.join(fl.common.parameter.ndarrays_to_parameters(client_param).tensors)
server_param = b''.join(param_storer.tensors)
# client_1.evaluate(, {})

In [None]:
print("Aggregated", hs.sha256(server_param).hexdigest())
print("Client received", hs.sha256(bytes_client_param).hexdigest())