<a href="https://colab.research.google.com/github/Cobra-de1/AES/blob/main/Federated_Privacy_AutoEncoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/fastforwardlabs/deepad.git

In [None]:
!pip3 install git+https://github.com/fastforwardlabs/cmlbootstrap#egg=cmlbootstrap

In [None]:
!pip3 install tensorflow-privacy
!pip3 install syft-tensorflow

In [None]:
!pip3 install syft[tensor-flow]

In [6]:
import os
os.chdir('deepad')

import argparse
from deepad.utils import data_utils, eval_utils
import numpy as np

import logging
logging.basicConfig(level=logging.INFO)

In [7]:
import tensorflow
from tensorflow.keras.layers import Lambda, Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras import regularizers
import logging

from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer

import os
from deepad.utils import train_utils

import numpy as np
import random


# set random seed for reproducibility
tensorflow.random.set_seed(2018)
np.random.seed(2018)
np.random.RandomState(2018)
random.seed(2018)

In [8]:
class AutoencoderModel():

    def __init__(self, n_features, hidden_layers=2, latent_dim=2, hidden_dim=[15, 7],
                 output_activation='sigmoid', learning_rate=0.01, epochs=15, batch_size=128, model_path=None):
        """ Build AE model.
        Arguments:
            - n_features (int): number of features in the data
            - hidden_layers (int): number of hidden layers used in encoder/decoder
            - latent_dim (int): dimension of latent variable
            - hidden_dim (list): list with dimension of each hidden layer
            - output_activation (str): activation type for last dense layer in the decoder
            - learning_rate (float): learning rate used during training
        """

        self.epochs = epochs
        self.batch_size = batch_size
        self.model_name = "ae"

        self.create_model(n_features, hidden_layers=hidden_layers, latent_dim=latent_dim,
                          hidden_dim=hidden_dim, output_activation=output_activation,
                          learning_rate=learning_rate, model_path=model_path)

    def create_model(self, n_features, hidden_layers=1, latent_dim=2, hidden_dim=[],
                     output_activation='sigmoid', learning_rate=0.001, model_path=None):

        # set dimensions hidden layers
        if hidden_dim == []:
            i = 0
            dim = n_features
            while i < hidden_layers:
                hidden_dim.append(int(np.max([dim/2, 2])))
                dim /= 2
                i += 1

        # Optional: add regularization to minimize overfitting?
        # kernel_regularizer = regularizers.l1_l2(l1=0.01, l2=0.01)
        # kernel_regularizer = regularizers.l1(0.01)
        kernel_regularizer = None

        # AE = encoder + decoder
        # encoder
        inputs = Input(shape=(n_features,), name='encoder_input')
        # define hidden layers
        enc_hidden = Dense(hidden_dim[0], activation='relu', name='encoder_hidden_0',
                           kernel_regularizer=kernel_regularizer)(inputs)
        i = 1
        while i < hidden_layers:
            enc_hidden = Dense(hidden_dim[i], activation='relu', name='encoder_hidden_'+str(
                i), kernel_regularizer=kernel_regularizer)(enc_hidden)
            i += 1

        z_ = Dense(latent_dim, name='z_')(enc_hidden)

        encoder = Model(inputs, z_, name='encoder')
        logging.info(encoder.summary())
        # plot_model(encoder, to_file='ae_mlp_encoder.png',
        #            show_shapes=True)

        # decoder
        latent_inputs = Input(shape=(latent_dim,), name='z_')
        # define hidden layers
        dec_hidden = Dense(hidden_dim[-1], activation='relu', name='decoder_hidden_0',
                           kernel_regularizer=kernel_regularizer)(latent_inputs)

        i = 2
        while i < hidden_layers+1:
            dec_hidden = Dense(hidden_dim[-i], activation='relu', name='decoder_hidden_'+str(
                i-1), kernel_regularizer=kernel_regularizer)(dec_hidden)
            i += 1

        outputs = Dense(n_features, activation=output_activation,
                        name='decoder_output')(dec_hidden)
        # instantiate decoder model
        decoder = Model(latent_inputs, outputs, name='decoder')
        logging.info(decoder.summary())
        # plot_model(decoder, to_file='ae_mlp_decoder.png',
        #            show_shapes=True)

        # instantiate AE model
        outputs = decoder(encoder(inputs))
        self.model = Model(inputs, outputs, name='ae', )

        # Differential privacy parameters
        l2_norm_clip = 1.5
        noise_multiplier = 0.5  # more noise -> more privacy, less utility
        num_microbatches = 1  # how many batches to split a batch into

        optimizer = DPKerasAdamOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=learning_rate)
        
        #optimizer = Adam(lr=learning_rate)
        
        self.model.compile(optimizer=optimizer, loss="mse")

    def train(self, in_train, in_val):
        # default args

        # training

        X_train, X_val = in_train, in_val
        logging.info("Training with data of shape " + str(X_train.shape))

        kwargs = {}
        kwargs['epochs'] = self.epochs
        kwargs['batch_size'] = self.batch_size
        kwargs['shuffle'] = True
        kwargs['validation_data'] = (X_val, X_val)
        kwargs['verbose'] = 1
        kwargs['callbacks'] = [train_utils.TimeHistory()]

        history = self.model.fit(X_train, X_train, **kwargs)

    def compute_anomaly_score(self, df):
        preds = self.model.predict(df)
        mse = np.mean(np.power(df - preds, 2), axis=1)
        return mse

    def save_model(self, model_path="models/savedmodels/ae/"):
        logging.info(">> Saving AE model to " + model_path)
        self.model.save_weights(model_path + "model")

    def load_model(self, model_path="models/savedmodels/ae/"):
        if (os.path.exists(model_path)):
            logging.info(">> Loading saved model weights")
            self.model.load_weights(model_path + "model")

In [9]:
test_data_partition = "8020"
in_train, out_train, scaler, _ = data_utils.load_kdd(
    data_path="data/kdd/", dataset_type="train", partition=test_data_partition)
in_test, out_test, _, _ = data_utils.load_kdd(
    data_path="data/kdd/", dataset_type="test", partition=test_data_partition, scaler=scaler)

INFO:root: >> KDD dataset loaded
INFO:root: >> KDD dataset loaded


In [12]:
def train_autoencoder():
    # Instantiate and Train Autoencoder
    ae_kwargs = {}
    ae_kwargs["latent_dim"] = 2
    ae_kwargs["hidden_dim"] = [15, 7]
    ae_kwargs["epochs"] = 14
    ae_kwargs["batch_size"] = 128
    # ae_kwargs["model_path"] = ae_model_path
    ae = AutoencoderModel(in_train.shape[1], **ae_kwargs)
    ae.train(in_train, in_test)
    ae.save_model()

    inlier_scores = ae.compute_anomaly_score(in_test)
    outlier_scores = ae.compute_anomaly_score(out_test)
    print(inlier_scores)
    print(outlier_scores)
    metrics = eval_utils.evaluate_model(
        inlier_scores, outlier_scores, model_name="ae", show_plot=False)
    print(metrics)
    return metrics

In [13]:
train_autoencoder()

INFO:root:None
INFO:root:None
INFO:root:Training with data of shape (97278, 18)


Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 18)]              0         
_________________________________________________________________
encoder_hidden_0 (Dense)     (None, 15)                285       
_________________________________________________________________
encoder_hidden_1 (Dense)     (None, 7)                 112       
_________________________________________________________________
z_ (Dense)                   (None, 2)                 16        
Total params: 413
Trainable params: 413
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_ (InputLayer)              [(None, 2)]               0         
____________________________________________

INFO:root:>> Saving AE model to models/savedmodels/ae/
INFO:root:4693unique thresholds
INFO:root:Testing all thresholds to find best accuracy ...


[0.36571972 0.36151761 0.01650999 ... 0.10743056 0.09246905 0.00062915]
[0.3918786  0.16342898 0.16342898 ... 0.11322385 0.11336261 0.16342898]


INFO:root:Threshold testing complete ...
INFO:root:Best accuracy is .. {'acc': 0.9481, 'threshold': 0.113}


{'acc': 0.945, 'precision': 0.8059071729957806, 'recall': 0.955, 'f1': 0.8741418764302059, 'f2': 0.9209257473481195, 'roc': 0.9608531875, 'threshold': 0.113}


{'acc': 0.945,
 'f1': 0.8741418764302059,
 'f2': 0.9209257473481195,
 'precision': 0.8059071729957806,
 'recall': 0.955,
 'roc': 0.9608531875,
 'threshold': 0.113}

In [None]:
'''
import syft as sy

hook = sy.KerasHook(tf)
alice = sy.VirtualWorker(hook, id="alice")
bob = sy.VirtualWorker(hook, id="bob")
workers = [alice, bob]

# this is done to have the local worker (you on your notebook!) have a registry
# of objects like every other workers, which is disabled by default but needed here
sy.local_worker.is_client_worker = False
'''

In [14]:
workers = ['alice', 'bob']

In [15]:
def make_model(inputshape):
    ae_kwargs = {}
    ae_kwargs["latent_dim"] = 2
    ae_kwargs["hidden_dim"] = [15, 7]
    ae_kwargs["epochs"] = 14
    ae_kwargs["batch_size"] = 128
    # ae_kwargs["model_path"] = ae_model_path
    return AutoencoderModel(inputshape, **ae_kwargs)

In [16]:
def federate(data, workers):
  number_worker = len(workers)
  federated = []
  offset = len(data) // number_worker
  for i in range(number_worker):
    federated.append(data[offset * i:offset * (i + 1)])
  return np.array(federated)

In [None]:
'''
def send_new_models(local_model, models):
    with th.no_grad():
        for remote_model in models:
            for new_param, remote_param in zip(local_model.parameters(), remote_model.parameters()):
                worker = remote_param.location
                remote_value = new_param.send(worker)
                remote_param.set_(remote_value)
'''

In [17]:
def send_new_models(local_model, models):
  for i, worker in enumerate(workers):
    new_param = local_model.model.get_weights()
    models[i].model.set_weights(new_param)

In [None]:
'''
def federated_aggregation(local_model, models):
    with th.no_grad():
        for local_param, *remote_params in zip(*([local_model.parameters()] + [model.parameters() for model in models])):
            param_stack = th.zeros(*remote_params[0].shape)
            for remote_param in remote_params:
                param_stack += remote_param.copy().get()
            param_stack /= len(remote_params)
            local_param.set_(param_stack)
'''

In [18]:
def federated_aggregation(local_model, models):  
  for i, worker in enumerate(workers):
    if i == 0:
      local_param = models[i].model.get_weights()
    else:
      for index in range(len(local_param)):
        local_param[index] += models[i].model.get_weights()[index]
  for i in range(len(local_param)):
    local_param[i] = local_param[i] / len(workers)
  local_model.model.set_weights(local_param)  

In [19]:
save_in_train = in_train.copy()
save_in_test = in_test.copy()

In [20]:
in_train = federate(in_train, workers)
in_test = federate(in_test, workers)

In [21]:
local_model = make_model(in_train[0].shape[1])
models = []
for i in range(len(workers)):
    model = make_model(in_train[0].shape[1])
    models.append(model)

INFO:root:None
INFO:root:None
INFO:root:None
INFO:root:None


Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 18)]              0         
_________________________________________________________________
encoder_hidden_0 (Dense)     (None, 15)                285       
_________________________________________________________________
encoder_hidden_1 (Dense)     (None, 7)                 112       
_________________________________________________________________
z_ (Dense)                   (None, 2)                 16        
Total params: 413
Trainable params: 413
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_ (InputLayer)              [(None, 2)]               0         
____________________________________________

INFO:root:None
INFO:root:None


Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 18)]              0         
_________________________________________________________________
encoder_hidden_0 (Dense)     (None, 15)                285       
_________________________________________________________________
encoder_hidden_1 (Dense)     (None, 7)                 112       
_________________________________________________________________
z_ (Dense)                   (None, 2)                 16        
Total params: 413
Trainable params: 413
Non-trainable params: 0
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_ (InputLayer)              [(None, 2)]               0         
____________________________________________

In [22]:
def train(epoch, delta):
    for i in range(epoch):
        print('Global epoch: ' + str(i + 1) + '/' + str(epoch))      
        
        # 1. Send new version of the model
        send_new_models(local_model, models)

        # 2. Train remotely the models
        for j, worker in enumerate(workers):

            print('Training in ' + workers[j])
            
            models[j].train(in_train[j], in_test[j])

        # 3. Federated aggregation of the updated models
        federated_aggregation(local_model, models)

In [23]:
train(3, 1e-5)

INFO:root:Training with data of shape (48639, 18)


Global epoch: 1/3
Training in alice
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


INFO:root:Training with data of shape (48639, 18)


Training in bob
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


INFO:root:Training with data of shape (48639, 18)


Global epoch: 2/3
Training in alice
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


INFO:root:Training with data of shape (48639, 18)


Training in bob
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


INFO:root:Training with data of shape (48639, 18)


Global epoch: 3/3
Training in alice
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


INFO:root:Training with data of shape (48639, 18)


Training in bob
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


In [24]:
inlier_scores = local_model.compute_anomaly_score(save_in_test)
outlier_scores = local_model.compute_anomaly_score(out_test)
print(inlier_scores)
print(outlier_scores)
metrics = eval_utils.evaluate_model(
    inlier_scores, outlier_scores, model_name="ae", show_plot=False)
print(metrics)
local_model.save_model()

INFO:root:4686unique thresholds
INFO:root:Testing all thresholds to find best accuracy ...


[4.68310857e-01 5.09136240e-01 1.00915558e-04 ... 1.09394045e-01
 1.03862758e-01 5.18827232e-02]
[0.38385925 0.14143097 0.14143097 ... 0.05893645 0.05921918 0.14143097]


INFO:root:Threshold testing complete ...
INFO:root:Best accuracy is .. {'acc': 0.9314, 'threshold': 0.14}
INFO:root:>> Saving AE model to models/savedmodels/ae/


{'acc': 0.9311, 'precision': 0.823065549531789, 'recall': 0.835, 'f1': 0.8289898237776122, 'f2': 0.8325855020440722, 'roc': 0.9383488749999999, 'threshold': 0.14}
