In [1]:
from TrainCelebAModels import get_datasets
from AAI_single_target import exp
from MIA2AIA import exp as exp_mia2aia
import numpy as np
import pandas as pd
import torch

# How to attack FL models?

   1) Train and save model (../celeba_models/ELU/federated_5009_i.pth)
   2) Select target sample (if using pandas select by index)
   3) Call exp
   
To get confidence intervals you must iterate several experiences for varying seeds.

## AAI:

Single-target

In [2]:
np.random.seed(42)
seeds = np.random.randint(low=0, high=1234, size=32)
seeds

array([1126,  860, 1130, 1095, 1044,  121,  466,  330,   87, 1123,  871,
        130,  769,  343,  805,  385, 1215,  955,  276, 1184,  459,   21,
        252,  747,  856,  474, 1082,  510,  699,  975,  189,  957])

In [3]:
data_train, data_test, shadow_data = get_datasets(seed=1126, oversample=None, shadow=5009)
data_train.reset_index(drop=True, inplace=True)
data_test.reset_index(drop=True, inplace=True)

unknown = 515 # male feature
neg_targets_idx = list(np.random.choice(data_train[data_train.iloc[:, unknown] == 0.0].index, size=1))
pos_targets_idx = list(np.random.choice(data_train[data_train.iloc[:, unknown] == 1.0].index, size=1))

neg_targets = data_train.loc[neg_targets_idx]
pos_targets = data_train.loc[pos_targets_idx]

model = '../celeba_models/ELU/federated_5009_0.pth'

In [4]:
tp, fn, tn, fp, adv_model = exp(numneurons=1024, # nb of neurons from the first fully connected layer
                                n_input=518, # input size of target samples
                                seed=seeds[0], # fix seed for reproducibility
                                unknown=unknown, # index of the sensitive/target attribute
                                lr=0.01, # lr of the adversarial network
                                shadow=shadow_data, # shadow dataset to train the adversarial network
                                epochs=1000, # epochs to train adversarial network
                                target=neg_targets, # target samples
                                indexes=neg_targets_idx, # index of target samples
                                model=model, # path to trained global model
                                data_train=data_train, # training data to calculate FPR
                                evaluation='theoretical', # type of evaluation desired
                                remove=1, # nb of features (counting from the end) to remove (main task label, ...)
                                model_w=None, # global model from the training round prior to the attack
                                alpha=None, # WADM threshold
                                save_path=None # save malicious global model, useful for defenses
                               )

Train and test shapes:
torch.Size([4009, 518]) | torch.Size([4009, 1])
torch.Size([1002, 518]) | torch.Size([1002, 1])
Training (999 epochs): TNR = 1.0, FPR = 0.0
tensor([[398.8165]])
tensor([[-4.8604]])
Validation: TNR = 1.0, FPR = 0.0, TPR = 1.0, FNR = 0.0

Inference phase
Attack report:
TPR = 1.0 | FNR = 0.0
TNR = 0.9999936819076538 | FPR = 6.326310995063977e-06
TPR 1.0
FPR tensor(6.3263e-06)


In [6]:
tp, fn, tn, fp, adv_model = exp(numneurons=1024,
                                n_input=518,
                                seed=seeds[0],
                                unknown=unknown,
                                lr=0.01,
                                shadow=shadow_data,
                                epochs=1000,
                                target=pos_targets,
                                indexes=pos_targets_idx,
                                model=model,
                                data_train=data_train,
                                evaluation='theoretical',
                                remove=1,
                                model_w=None,
                                alpha=None,
                                save_path=None
                               )

Train and test shapes:
torch.Size([4009, 518]) | torch.Size([4009, 1])
torch.Size([1002, 518]) | torch.Size([1002, 1])
Training (999 epochs): TNR = 1.0, FPR = 0.0
tensor([[416.4247]])
tensor([[13.8349]])
Validation: TNR = 1.0, FPR = 0.0, TPR = 0.5, FNR = 0.5

Inference phase
Attack report:
TPR = 0.0 | FNR = 1.0
TNR = 0.9999936819076538 | FPR = 6.326310995063977e-06


## MIA2AIA:

Single-target

In [4]:
model = '../celeba_models/ReLU/federated_5009_0.pth'

In [5]:
tp, fn, tn, fp, adv_model = exp_mia2aia(numneurons=1024, # nb of neurons from the first fully connected layer
                                        n_input=518, # input size of target samples
                                        seed=seeds[0], # fix seed for reproducibility
                                        unknown=unknown, # index of the sensitive/target attribute
                                        lr=0.0005, # lr of the adversarial network
                                        shadow=shadow_data, # shadow dataset to train the adversarial network
                                        epochs=1000, # epochs to train adversarial network
                                        target=neg_targets, # target samples
                                        indexes=neg_targets_idx, # index of target samples
                                        model=model, # path to trained global model
                                        data_train=data_train, # training data to calculate FPR
                                        type_attack='single', # single or multi target attack
                                        remove=1, # nb of features (counting from the end) to remove (main task label, ...)
                                        evaluation='theoretical', # type of evaluation desired
                                        imputation=True, # the attack can fall back to data imputation (predict most frequent value)
                                        model_w=None, # global model from the training round prior to the attack
                                        alpha=None, # WADM threshold
                                        save_path=None # save malicious global model, useful for defenses
                                       )

Train and test shapes:
torch.Size([4009, 518]) | torch.Size([4009, 3])
torch.Size([1004, 518]) | torch.Size([1004, 3])
Epoch 999: TPR 1.0 | FPR 0.00024950099800402636 | Accuracy 0.9997505612372163
Valiation: TPR 1.0 | FPR 0.0009970089730807485 | Accuracy 0.999003984063745
Inference phase
--------------------------------------
Inconclusive
Prediction = 0.0
Logits tensor([[-10.4412,  10.4156,   8.3968]], grad_fn=<AddmmBackward0>)
Truth: 0.0
--------------------------------------
Attack report:
TPR = 1.0 | FNR = 0.0
TNR = 0.999993673688872 | FPR = 6.326311127981274e-06


In [6]:
tp, fn, tn, fp, adv_model = exp_mia2aia(numneurons=1024,
                                        n_input=518,
                                        seed=seeds[0],
                                        unknown=unknown,
                                        lr=0.0005,
                                        shadow=shadow_data,
                                        epochs=1000,
                                        target=pos_targets,
                                        indexes=pos_targets_idx,
                                        model=model,
                                        data_train=data_train,
                                        type_attack='multi', # single or multi target attack
                                        remove=1,
                                        evaluation='theoretical',
                                        imputation=True, # the attack can fall back to data imputation (predict most frequent value)
                                        model_w=None,
                                        alpha=None,
                                        save_path=None
                                       )

Train and test shapes:
torch.Size([4009, 518]) | torch.Size([4009, 3])
torch.Size([1004, 518]) | torch.Size([1004, 3])
Epoch 999: TPR 1.0 | FPR 0.001122754491017952 | Accuracy 0.9988775255674731
Valiation: TPR 1.0 | FPR 0.0009970089730807485 | Accuracy 0.999003984063745
Inference phase
--------------------------------------
Logits tensor([[-9.0337,  9.1127, -2.3177]], grad_fn=<AddmmBackward0>)
Truth: 1.0
--------------------------------------
Attack report:
TPR = 1.0 | FNR = 0.0
TNR = 0.9989656481305751 | FPR = 0.0010343518694249383


# How to defend against novel active attacks?

   1) Monitor and save performance, in terms of accuracy and ROC AUC, of each client during the FL simulation and after receiving the malicious global model (../celeba_models/ELU_monitorization_accuracy_i.csv or ../celeba_models/ELU_monitorization_auc_i.csv)
   2) Save malicious global model using parameter save_path of exp (../celeba_models/ELU_attack_515/federated_5009.pth)
   3) Call automate_BADAcc, automate_BADAUC or automate_WADM 
       * Round_2_515: attack occurred after 2 training rounds targeting feature 515

In [1]:
from black_box_defense import automate_BADAcc, automate_BADAUC
from WADM import automate_WADM

In [13]:
monitor_acc = pd.read_csv('../celeba_models/ELU_monitorization_accuracy_0.csv').drop('Unnamed: 0', axis=1, inplace=False)
monitor_auc = pd.read_csv('../celeba_models/ELU_monitorization_auc_0.csv').drop('Unnamed: 0', axis=1, inplace=False)

In [14]:
monitor_acc.head(2)

Unnamed: 0,Round_0,Round_1,Round_2_512,Round_2_515,Round_2_513,Round_2_514,Round_2_516,Round_2_517
0,0.513507,0.792054,0.513507,0.513507,0.513507,0.513507,0.513507,0.513507
1,0.514519,0.789713,0.514519,0.514519,0.514519,0.514519,0.514519,0.514519


In [15]:
monitor_auc.head(2)

Unnamed: 0,Round_0,Round_1,Round_2_512,Round_2_515,Round_2_513,Round_2_514,Round_2_516,Round_2_517
0,0.375256,0.87477,0.482931,0.482931,0.482931,0.482931,0.482931,0.482931
1,0.378529,0.873187,0.479201,0.479201,0.479201,0.479201,0.479201,0.479201


### BADAcc:

In [16]:
automate_BADAcc(feature='Male', # feature name
                n=515, # feature index
                info_path='../celeba_models/', # path containing monitorization .csv files
                sizes=[19759*np.ones(10) for _ in range(32)], # local training datasets size
                attack_round=2, # round in which the attack occurs, in this case celeba models are attacked after training for 2 rounds
                n_clients=10 # nb of clients
               )

Feature Male:
MIA2AIA
TPR = (91.82, 101.93) | FPR = (0.0, 0.0) | Missed = (-1.93, 8.18)
AAI
TPR = (77.88, 97.12) | FPR = (0.0, 0.0) | Missed = (2.88, 22.12)


### BADAUC:

In [17]:
automate_BADAUC(feature='Male', # feature name
                n=515, # feature index
                info_path='../celeba_models/', # path containing monitorization .csv files
                attack_round=2, # round in which the attack occurs
                n_clients=10 # nb of clients
               )

Feature Male:
MIA2AIA
TPR (100.0, 100.0) | FPR = (0.0, 0.0) | Missed = (0.0, 0.0) alpha = 0.005
AAI
TPR (100.0, 100.0) | FPR = (0.0, 0.0) | Missed = (0.0, 0.0) alpha = 0.005


### WADM:

In [2]:
automate_WADM(dataset='../celeba_models', # path containing models trained for this dataset
              attack='ELU', # type of activation function to execute each attack (AAI - ELU, MIA2AIA - ReLU)
              attribute='515', # sensitive/target attribute
              model='federated_5009', # typical file name of models (should be the same for benign and malicious models)
              total_neurons=512, # nb of neurons from the second fully connected layer
              N=1 # nb of malicious neurons
             )

TPR (100.0, 100.0)
FPR (-0.0, 0.02)
Missed Attacks (0.0, 0.0)
