# Federated deep model with Flex for time series anomaly detection

First we do all the imports needed.

In [16]:
from flexanomalies.models import DeepCNN_LSTM
from flexanomalies.utils.load_data import split_data, federate_data
from flexanomalies.datasets.preprocessing_utils import (
    create_windows,
    encode_and_bind,
    scaling,
    impute_lost_values,
)
from flexanomalies.utils.metrics import print_metrics
from flexanomalies.utils.process_scores import (
    process_scores_with_percentile,
    process_scores_with_threshold,
)
from sklearn.preprocessing import StandardScaler
from flexanomalies.pool.primitives_deepmodel import (
    build_server_model_ae,
    copy_model_to_clients_ae,
    train_ae,
    set_aggregated_weights_ae,
    weights_collector_ae,
    evaluate_global_model,
    evaluate_global_model_clients,
    threshold_collector_ae,
)
from flexanomalies.pool.aggregators_favg import aggregate_ae
from flexanomalies.utils.save_results import save_experiments_results
from flex.pool import FlexPool
from flexanomalies.utils.metrics import *
import pandas as pd
import numpy as np

In [2]:
file_path = "../flex-anomalies/flexanomalies/datasets/data/corrected.gz"
split_test = 0.3

In [3]:
df = pd.read_csv(file_path, header=None)

# process labels
df.loc[df[41] != "normal.", 41] = 1
df.loc[df[41] == "normal.", 41] = 0
labels = df[41]

df = df.drop([41], axis=1)
features_to_encode = [1, 2, 3]
df = df.drop(features_to_encode, axis=1)

# for feature in features_to_encode:
#     df = encode_and_bind(df, feature)

In [4]:
model_params = {
    "epochs": 50,
    "input_dim": df.shape[1],
    "batch_size": 32,
    "filters_cnn": [64, 64],
    "units_lstm": [32],
    "kernel_size": [9, 9],
    "hidden_act": ["relu", "relu"],
    "w_size": 30,
    "n_pred": 10,
    "contamination": 0.1,
}

In [5]:
X = scaling(np.array(df.iloc[:, :].astype(float)))
y = np.array(labels)
X_train, X_test, l_train, l_test = split_data(X, y, split_size=0.30)

In [6]:
(
    X_train_windows,
    y_train_windows,
    X_test_windows,
    y_test_windows,
    l_test_windows,
) = create_windows(model_params["w_size"], model_params["n_pred"], X_train, X_test,l_train, l_test)

print("X_train shape == {}.".format(np.array(X_train_windows).shape))
print("y_train shape == {}.".format(np.array(y_train_windows).shape))
print("X_test shape == {}.".format(np.array(X_test_windows).shape))
print("y_test shape == {}.".format(np.array(y_test_windows).shape))
print("l_test shape == {}.".format(np.array(l_test_windows).shape))


X_train shape == (21769, 30, 38).
y_train shape == (21769, 10, 38).
X_test shape == (9327, 30, 38).
y_test shape == (9327, 10, 38).
l_test shape == (93270,).


In [7]:
model = DeepCNN_LSTM(**model_params)

relu 64 9
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 22, 64)            21952     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 11, 64)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 3, 64)             36928     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 1, 64)            0         
 1D)                                                             
                                                                 
 lstm (LSTM)                 (None, 1, 32)             12416     
                                                                 
 flatten (Flatten)           (None, 32)       

2024-01-08 14:40:37.602862: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-01-08 14:40:37.602886: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2024-01-08 14:40:37.602906: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (beatriz-IdeaPad-3-15ITL6): /proc/driver/nvidia/version does not exist
2024-01-08 14:40:37.603216: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
flex_dataset = federate_data(5, X_train_windows, y_train_windows)
pool = FlexPool.client_server_pool(
    fed_dataset=flex_dataset,
    server_id="cnn_lstm_server",
    init_func=build_server_model_ae,
    model=model,
)

In [13]:
def evaluate_global_model(
    model,
    X,
    y,
    labels,
    metrics=["Accuracy", "Precision", "F1", "Recall", "AUC_ROC"],
    threshold=None,
):
    prediction = model.model.predict(X)
    print(prediction.shape)
    print(y.shape)
    print(np.mean((y - prediction), axis=2).shape)

    d_scores = np.mean((y - prediction), axis=2).flatten()
    if threshold is None:
        threshold = process_scores_with_percentile(d_scores, 0.1)
        print(threshold)

    l = (d_scores > threshold).astype("int").ravel()
    model.result_metrics_ = print_metrics(metrics, labels, l)

In [10]:
def evaluate_global_model_clients(client_flex_model, client_data, *args, **kwargs):

    X_test, y_test = client_data.to_numpy()
    p = client_flex_model["model"].predict(X_test, y_test)

    # d_scores = np.linalg.norm(y_test - p, axis=2)
    d_scores = np.mean((y_test - p), axis=2).flatten()
    threshold = process_scores_with_percentile(d_scores, 0.1)
    client_flex_model["threshold"] = threshold
    print("map", client_flex_model["threshold"])


def threshold_collector_ae(client_model, client_data):
    print("colect", client_model["threshold"])
    return client_model["threshold"]

In [11]:
for i in range(3):
    print(f"\nRunning round: {i}\n")
    pool.servers.map(copy_model_to_clients_ae, pool.clients)
    pool.clients.map(train_ae)
    pool.aggregators.map(weights_collector_ae, pool.clients)
    pool.aggregators.map(aggregate_ae)
    pool.aggregators.map(set_aggregated_weights_ae, pool.servers)
output_model = pool.servers._models["cnn_lstm_server"]["model"]

# output_model.evaluate(X_test_windows, l_test)7


Running round: 0

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......conv1d
.........vars
............0
............1
......conv1d_1
.........vars
............0
............1
......dense
.........vars
............0
............1
......flatten
.........vars
......lstm
.........cell
............vars
...............0
...............1
...............2
.........vars
......max_pooling1d
.........vars
......max_pooling1d_1
.........vars
......reshape
.........vars
...optimizer
......vars
.........0
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2024-01-08 14:40:44         3536
variables.h5                                   2024-01-08 14:40:44       361592
metadata.json                                  2024-01-08 14:40:44           64
Keras model archive loading:
File Name                                             Modified             Size
config

In [17]:
evaluate_global_model(output_model, X_test_windows, y_test_windows, l_test_windows)

Acc: 80.231% 

Precision: 0.974 

F1score: 0.864 

Recall: 0.776 

AUC_ROC: 0.844 



In [18]:
flex_dataset1 = federate_data(5, X_test_windows, y_test_windows)
pool._data = flex_dataset1

In [19]:
from flexanomalies.pool.aggregators_stats import aggregate_stats_mean

pool.clients.map(evaluate_global_model_clients)
thresholds = pool.clients.map(threshold_collector_ae)
aggregate_stats_mean(thresholds)



0.00036373324154635836

In [20]:
evaluate_global_model(
    output_model,
    X_test_windows,
    y_test_windows,
    l_test_windows,
    threshold=aggregate_stats_mean(thresholds),
)

Acc: 80.395% 

Precision: 0.973 

F1score: 0.865 

Recall: 0.778 

AUC_ROC: 0.845 



In [19]:
save_experiments_results(
    "cnn_lstm",
    output_model,
    "test_cnn_lstm_notebook",
    model_params,
    "kddcup",
    5,
    3,
    0.3,
)