# Federated deep model with Flex for time series anomaly detection

First we do all the imports needed.

In [1]:
from flexanomalies.models import DeepCNN_LSTM
from flexanomalies.utils.load_data import split_data, federate_data
from flexanomalies.utils.metrics import print_metrics
from flexanomalies.utils.process_scores import process_scores_with_percentile
from sklearn.preprocessing import StandardScaler
from flexanomalies.pool.primitives_deepmodel import (
    build_server_model_ae,
    copy_model_to_clients_ae,
    train_ae,
    set_aggregated_weights_ae,
    weights_collector_ae,
)
from flexanomalies.pool.aggregators_favg import aggregate_ae
from flexanomalies.utils.save_results import save_experiments_results
from flex.pool import FlexPool
from flexanomalies.utils.metrics import *
import pandas as pd

2023-12-16 11:49:08.718153: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-16 11:49:08.954448: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-16 11:49:08.970181: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-12-16 11:49:08.970197: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore 

In [2]:
file_path = "../flex-anomalies/flexanomalies/datasets/data/corrected.gz"
split_test = 0.3

In [3]:
def encode_and_bind(original_dataframe, feature_to_encode):
    dummies = pd.get_dummies(original_dataframe[[feature_to_encode]])
    res = pd.concat([original_dataframe, dummies], axis=1)
    res = res.drop([feature_to_encode], axis=1)
    return res

In [4]:
df = pd.read_csv(file_path, header=None)
     
# process labels
df.loc[df[41] != "normal.", 41] = 1
df.loc[df[41] == "normal.", 41] = 0
labels = df[41]
df = df.drop([41], axis=1)

features_to_encode = [1, 2, 3]
df = df.drop(features_to_encode, axis=1)
# for feature in features_to_encode:
#     df = encode_and_bind(df, feature)

In [5]:
model_params = {
    "epochs": 5,
    "input_dim": df.shape[1],
    "batch_size": 32,
    "filters_cnn": [32, 32, 32],
    "units_lstm": [32, 32],
    "kernel_size": [11, 11],
    "hidden_act": ["relu", "relu"],
    "w_size": 100,
    "n_pred": 10,
    "contamination": 0.1,
}

In [6]:
X = np.array(df.iloc[:, :].astype(float))
y = np.array(labels)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, l_train, l_test = split_data(X, y, split_size=0.30)

In [7]:
def create_windows(w_size, n_pred):  # Number of predictions into the future
    X_train_windows = []
    y_train_windows = []
    X_test_windows = []
    y_test_windows = []
    l_test_windows = []

    for i in range(0, len(X_train), n_pred):
        temp_xtrain = X_train[i : w_size + i, :]
        temp_ytrain = X_train[w_size + i : w_size + i + n_pred, :]
        if len(temp_xtrain) < w_size or len(temp_ytrain) < n_pred:
            break
        X_train_windows.append(temp_xtrain)

        y_train_windows.append(temp_ytrain)

    # y_train_windows.append(y_train[i:i + n_pred].reshape(-1,1))

    # Test
    for i in range(0, len(X_test), n_pred):

        temp_xtest = X_test[i : w_size + i, :]
        temp_ytest = X_test[w_size + i : w_size + i + n_pred, :]
        temp_ltest = l_test[w_size + i : w_size + i + n_pred]
        if (
            len(temp_xtest) < w_size
            or len(temp_ytest) < n_pred
            or len(temp_ltest) < n_pred
        ):
            break

        X_test_windows.append(temp_xtest)

        y_test_windows.append(temp_ytest)

        l_test_windows.extend(temp_ltest)

    # X_train_windows = X_train_windows[:-1]
    # y_train_windows = y_train_windows[:-1]
    # X_test_windows = X_test_windows[:-1]
    # y_test_windows = y_test_windows[:-1]
    # l_te_windows = l_test_windows[:-1]

    print("X_train shape == {}.".format(np.array(X_train_windows).shape))

    print("y_train shape == {}.".format(np.array(y_train_windows).shape))

    print("X_test shape == {}.".format(np.array(X_test_windows).shape))

    print("y_test shape == {}.".format(np.array(y_test_windows).shape))
    print("l_test shape == {}.".format(np.array(l_test_windows).shape))

    return (
        np.array(X_train_windows),
        np.array(y_train_windows),
        np.array(X_test_windows),
        np.array(y_test_windows),
        np.array(l_test_windows),
    )

In [8]:
(
    X_train_windows,
    y_train_windows,
    X_test_windows,
    y_test_windows,
    l_test_windows,
) = create_windows(model_params["w_size"], model_params["n_pred"])


X_train shape == (21762, 100, 38).
y_train shape == (21762, 10, 38).
X_test shape == (9320, 100, 38).
y_test shape == (9320, 10, 38).
l_test shape == (93200,).


In [9]:
model = DeepCNN_LSTM(**model_params)


2023-12-16 11:49:20.454425: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-12-16 11:49:20.454779: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-12-16 11:49:20.454799: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (beatriz-IdeaPad-3-15ITL6): /proc/driver/nvidia/version does not exist
2023-12-16 11:49:20.455224: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


relu 32 11
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 90, 32)            13408     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 45, 32)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 35, 32)            11296     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 17, 32)           0         
 1D)                                                             
                                                                 
 lstm (LSTM)                 (None, 17, 32)            8320      
                                                                 
 lstm_1 (LSTM)               (None, 17, 32)  

In [19]:
flex_dataset = federate_data(5, X_train_windows, y_train_windows)

In [20]:
pool = FlexPool.client_server_pool(
    fed_dataset=flex_dataset,
    server_id="cnn_lstm_server",
    init_func=build_server_model_ae,
    model=model,
)

In [21]:
def evaluate_global_model(
    model,
    X,
    y,
    labels,
    metrics=["Accuracy", "Precision", "F1", "Recall", "AUC_ROC"],
    threshold=None,
):
    prediction = model.model.predict(X)
    print(prediction.shape)
    print(y.shape)
    print(np.mean((y - prediction), axis=2).shape)

    d_scores = np.mean((y - prediction), axis=2).flatten()
    if threshold is None:
        threshold = process_scores_with_percentile(d_scores, 0.1)

    l = (d_scores > threshold).astype("int").ravel()
    model.result_metrics_ = print_metrics(metrics, labels, l)

In [27]:
def evaluate_global_model_clients(client_flex_model, client_data, *args, **kwargs):

    X_test, y_test = client_data.to_numpy()
    p = client_flex_model["model"].predict(X_test, y_test)
    d_scores = np.linalg.norm(y_test - p, axis=2)
    threshold = process_scores_with_percentile(d_scores, 0.1)
    return threshold

In [24]:
for i in range(3):
    print(f"\nRunning round: {i}\n")
    pool.servers.map(copy_model_to_clients_ae, pool.clients)
    pool.clients.map(train_ae)
    pool.aggregators.map(weights_collector_ae, pool.clients)
    pool.aggregators.map(aggregate_ae)
    pool.aggregators.map(set_aggregated_weights_ae, pool.servers)
output_model = pool.servers._models["cnn_lstm_server"]["model"]

# output_model.evaluate(X_test_windows, l_test)7


Running round: 0

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......conv1d
.........vars
............0
............1
......conv1d_1
.........vars
............0
............1
......dense
.........vars
............0
............1
......flatten
.........vars
......lstm
.........cell
............vars
...............0
...............1
...............2
.........vars
......lstm_1
.........cell
............vars
...............0
...............1
...............2
.........vars
......max_pooling1d
.........vars
......max_pooling1d_1
.........vars
......reshape
.........vars
...optimizer
......vars
.........0
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-12-16 11:49:59         4371
variables.h5                                   2023-12-16 11:49:59      1024752
metadata.json                                  2023-12-16 11:49:59           64
Keras 

In [28]:
from flexanomalies.pool.aggregators_stats import aggregate_stats
pool.clients.map(func = evaluate_global_model_clients)



[3.97383911684389,
 3.952595096954968,
 3.9772746891336803,
 3.9271089534425476,
 3.9242868959288177]

In [26]:
evaluate_global_model(model, X_test_windows, y_test_windows, l_test_windows)

(9320, 10, 38)
(9320, 10, 38)
(9320, 10)
Acc: 80.123% 

Precision: 0.973 

F1score: 0.863 

Recall: 0.775 

AUC_ROC: 0.843 



In [None]:
save_experiments_results(
    "cnn_lstm",
    output_model,
    "test_cnn_lstm_notebook",
    model_params,
    "shuttle.mat",
    5,
    3,
    0.3,
)