Skip to content

Commit

Permalink
add code for IEEE paper
Browse files Browse the repository at this point in the history
  • Loading branch information
mitrailer committed Jan 21, 2021
1 parent 0be5f35 commit f787e5e
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 144 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -70,7 +70,7 @@ nodes can be find at the [Utils folder](utils).

* **David Lopez** [mitrailer](https://github.com/mitrailer)
* **Bilal Farooq** [billjee](https://github.com/billjee/)
* **Ranwa Al Mallah**
* **Ranwa Al Mallah** [ranwaalmallah](https://github.com/ranwaalmallah)
* **Ali Yazdizadeh** [Ali-TRIPLab](https://github.com/Ali-TRIPLab)

## License
Expand Down
131 changes: 131 additions & 0 deletions use_cases/untargeted_poisoning/Attestedfl.py
@@ -0,0 +1,131 @@
import numpy as np
import tensorflow as tf
import math
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats


def attestedfl_1(step, worker, warm_up):
previous_step = step - 1
n_matrix = np.load('data_test/' + worker + '/local_model_' + str(step) + '.npy', allow_pickle=True)
global_m = np.load('data_test/global_model_' + str(previous_step) + '.npy', allow_pickle=True)
# Load previous Euclidean distances if exists
try:
euclidean_distances = np.load('data_test/' + worker + '/euclidean_distances_' + str(step) + '.npy',
allow_pickle=True)
except:
print("step:", step)
# Compute euclidean Distance in the current step
# Skip first iteration
if step == 1:
euclidean_distance = tf.norm(n_matrix - global_m, ord='euclidean')
e_d_array = np.asarray([[euclidean_distance]])
np.save('data_test/' + worker + '/euclidean_distances_' + str(step) + '.npy', e_d_array)
else:
euclidean_distance = tf.norm(n_matrix - global_m, ord='euclidean')
e_d_array = np.asarray([[euclidean_distance]])
euclidean_distances = np.append(euclidean_distances,e_d_array)
np.save('data_test/' + worker + '/euclidean_distances_' + str(step) + '.npy', euclidean_distances)

if step > warm_up:
euclidean_distances = np.load('data_test/' + worker + '/euclidean_distances_' + str(step) + '.npy',
allow_pickle=True)
c = step - warm_up
euclidean_distance_to_test = euclidean_distances[warm_up:5]
delta_array = []
for idx, e_d in euclidean_distance_to_test:
delta = e_d
delta_1 = euclidean_distances[warm_up + idx + 1]
t = warm_up + idx
delta_sum = 1 - math.exp(t/c(delta_1 + delta))
delta_array.append(delta_sum)
delta_avg = np.sum(delta_array) / c
delta_mean = np.mean(delta_array)
delta_std = np.std(delta_array)

if delta_avg <= delta_mean - 4 * delta_std:
return True
else:
return False
return True


def attestedfl_2(step, worker, warm_up):
"""
The attestedFL_2 algorithms checks the cosine similarity on the last layer of the CNN model
"""
if step > warm_up:
previous_step = step - 1
reliable = False
n_1_matrix = np.load('data_test/' + worker + '/local_model_' + str(previous_step) + '.npy', allow_pickle=True)
n_matrix = np.load('data_test/' + worker + '/local_model_' + str(step) + '.npy', allow_pickle=True)
global_m = np.load('data_test/global_model_' + str(previous_step) + '.npy', allow_pickle=True)
first = []
second = []
n_1 = n_1_matrix[6].reshape(1, -1)
n = n_matrix[6].reshape(1, -1)
g = global_m[6].reshape(1, -1)
similarities = cosine_similarity(n_1, g)
similarities_two = cosine_similarity(n, g)
first.append(abs(similarities))
second.append(abs(similarities_two))
total = np.array([first, second])
# print(total)
chi2_stat, p_val, dof, ex = stats.chi2_contingency(total)
logger = open('data_paper/logs/cosine_attacker_' + worker + '.csv', "a")
logger.write("{},{},{},{}".format(step, worker, float(abs(similarities)), float(abs(similarities_two))) + '\n')
logger.close()
if p_val < 0.1:
reliable = False
print(str(worker) + ' is not reliable')
return reliable
else:
reliable = True
return reliable
else:
return True


def attestedfl_3(step, worker, warm_up):
reliable = True
# for the sake of the example we consider a worker is training as follows:
# 1. Get the errors at each iteration (epoch)
# 2. Fit a logarithmic curve to the data that contains errors (y-axis) over iteration (x-axis)
# 3. If the slop of the logarithmic curve is negative or small (less than .2) the worker is training.
# A negative o small slop means that the errors are approaching to a small number. In any other case, the worker is
# not training
if step > warm_up:
reliable = False
errors_table = pd.read_csv('data_paper/logs/attestedFL-3/errors_' + worker + '.csv', header=None)
iteration = errors_table[0]
errors = errors_table[2]
fittedParameters = np.polyfit(np.log(iteration), errors, 1)
first_prediction = np.polyval(fittedParameters, 1)
last_prediction = np.polyval(fittedParameters, step)
slope = ((last_prediction - first_prediction)/(step - 1))

if slope <= 0:
reliable = True
return reliable
else:
if slope <= .4:
reliable = True
return reliable
else:
reliable = False
return reliable
return reliable


def attestedfl(step, worker):
# For the sake of the example let assume that the warmup period is 30 epochs
warm_up = 30
reliable = False
if attestedfl_1(step, worker, warm_up):
if attestedfl_2(step, worker, warm_up):
if attestedfl_3(step, worker, warm_up):
reliable = True
return reliable
return reliable
42 changes: 28 additions & 14 deletions use_cases/untargeted_poisoning/README.md
@@ -1,8 +1,24 @@
# Reference
Code of the paper submitted to the [USENIX summer call](https://www.usenix.org/conference/usenixsecurity21)
Code of the paper submitted to the [IEEE Transactions on Dependable and Secure Computing](https://www.computer.org/csdl/journal/tq)

**Untargeted Poisoning Attack Detection via Blockchain in Federated Learning** by Ranwa Al Mallah, David López,
Bilal Farooq and Ali Yazdizadeh
**Untargeted Poisoning Attack Detection in Federated Learning via Behavior Attestation** by
Ranwa Al Mallah, David López and Bilal Farooq

# Abstract
Federated Learning (FL) is a paradigm in Machine Learning (ML) that addresses critical issues such as data privacy,
security, access rights and access to heterogeneous information by training a global model using distributed nodes.
Despite its advantages, there is an increased potential for cyberattacks on FL-based ML techniques that can undermine
the benefits. Model-poisoning attacks on FL target the availability of the model. The adversarial objective is to
disrupt the training. We propose attestedFL, a defense mechanism that monitors the training of individual nodes
through state persistence in order to detect a malicious _worker_. A fine-grained assessment of the history of
the _worker_ permits the evaluation of its behavior in time and results in innovative detection strategies.
We present three lines of defense that aim at assessing if the _worker_ is reliable by observing if the node
is really training, advancing towards a goal. Our defense exposes an attacker's malicious behavior and removes
unreliable nodes from the aggregation process so that the FL process converge faster. We present promising results
on the impact of our defense on the accuracy the model reaches under the adversarial setting. Through extensive
evaluations and against various adversarial settings, attestedFL increased the accuracy of the model between
12% to 58% under different scenarios of attacks performed at different stages of convergence, attackers
colluding and continuous attacks.

# Summary
This experiment runs targeted and untargetted attacks on a Federated Learning (FL) process
Expand All @@ -12,26 +28,25 @@ The experiment follows the next steps.
2. The *worker* nodes re-train the model with their local data and send the results to the *chief* node
3. At a designated EPOCH malicious *workers* send targeted or untargetted attacks
3. The *chief* node averages the results (including the malicious model) and send the average to all *workers*
4. A defense mechanism called REALLY!? algorithm may be used to defend the FL from being poisoned
4. A defense mechanism called AttestedFL algorithm may be used to defend the FL from being poisoned

All transactions are recorded in the BSMD and we use sockets for p2p data transfers.
You must have at least one Iroha node running

# Setup
1. To set up the the Blockchain follow [this procedure](../../utils/README.md)
2. To set up the the Federated learning go to the 'setup'
1. To set up the Blockchain follow [this procedure](../../utils/README.md)
2. To set up the Federated learning go to the 'setup'
section of [this procedure](../federated_learning/README.md)
3. In the repository we provide a malicious matrix for the targeted attack, however you can creat your own
3. In the repository we provide a malicious matrix for the targeted attack, however you can create your own
by running the script [create_MM](create_MM.py). Note: you will need to run the experiment at least for
one EPOCH for this script to function. The thing is that it need an example 'good' matrix to recreate a
malicious one.
one EPOCH for this script to function.

# Run experiment
You can try targeted and untargeted at different stages. To do so go to line 565 in the [hook](hook.py) file
and follow the instructions. To defend the FL process from the attack go to line 492 in the [hook](hook.py)
You can try targeted and untargeted at different stages. To do so go to line 571 in the [hook](hook.py) file
and follow the instructions. To defend the FL process from the attack go to line 493 in the [hook](hook.py)
file and follow the instructions.

You can run the experiment in up to 20 machines. However you can also run the experiment in different shells.
You can run the experiment in up to 20 machines. However, you can also run the experiment in different shells.

On the chief-computer (shell) run
```bash
Expand Down Expand Up @@ -63,7 +78,6 @@ python3 federated_classifier.py --is_chief=False --worker_name=worker19 --domain
```

## Authors
* **Ranwa Al Mallah**
* **Ranwa Al Mallah** [ranwaalmallah](https://github.com/ranwaalmallah)
* **David Lopez** [mitrailer](https://github.com/mitrailer)
* **Bilal Farooq** [billjee](https://github.com/billjee/)
* **Ali Yazdizadeh**[Ali-TRIPLab](https://github.com/Ali-TRIPLab)
10 changes: 8 additions & 2 deletions use_cases/untargeted_poisoning/attacker.py
@@ -1,5 +1,6 @@
import numpy as np
import tensorflow as tf
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

def node_attacking_targeted(step_value):
file = step_value - 1
Expand All @@ -9,10 +10,15 @@ def node_attacking_targeted(step_value):
learning_rate = 0.001
send_malicious = ((total_workers/learning_rate)*(malicious_model - global_model)) + global_model
print('Attack!!!!!')
# print(malicious_model)
# print(global_model)
# print(send_malicious)
return send_malicious

def node_attacking_un_targeted(step_value):

def node_attacking_un_targeted():
global_m = np.load('data_test/global_model_1.npy', allow_pickle=True)
# Create the malicious matrix
malicious_matrix = []
for i in range(0, 7):
shape = global_m[i].shape
Expand All @@ -21,4 +27,4 @@ def node_attacking_un_targeted(step_value):
matrix = np.random.normal(mean, std, shape)
malicious_matrix.append(matrix)
print('Attack!!!!!')
return malicious_matrix
return malicious_matrix
1 change: 1 addition & 0 deletions use_cases/untargeted_poisoning/create_MM.py
Expand Up @@ -12,3 +12,4 @@
malicious_matrix.append(matrix)
np.save('data_test/malicious_matrix.npy', malicious_matrix, allow_pickle=True)
print('done')
#print(global_m)
16 changes: 12 additions & 4 deletions use_cases/untargeted_poisoning/federated_classifier.py
Expand Up @@ -11,7 +11,7 @@
from iroha_config import CHIEF_PRIVATE_IP, CHIEF_PUBLIC_IP, BATCH_SIZE, EPOCHS, INTERVAL_STEPS, WAIT_TIME
from time import time
import tensorflow as tf

np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
np.set_printoptions(threshold=sys.maxsize)

flags = tf.app.flags
Expand Down Expand Up @@ -167,9 +167,14 @@ def after_run(self, run_context, run_values):
print("Epoch {}/{} - loss: {:.4f} - acc: {:.4f}".format(int(step_value / N_BATCHES), EPOCHS,
self._total_loss / N_BATCHES,
self._total_acc / N_BATCHES))
print("Epoch {}/{} - loss: {:.4f} - acc: {:.4f}".format(int(step_value / N_BATCHES), EPOCHS,
self._total_loss / N_BATCHES,
self._total_acc / N_BATCHES))
else:
logger = open('data_paper/logs/attestedFL-3/errors_' + FLAGS.worker_name + '.csv', "a")
logger.write("{},{},{}".format(step_value / N_BATCHES, FLAGS.worker_name,
self._total_loss / N_BATCHES) + '\n')
logger.close()
# print("Epoch {}/{} - loss: {:.4f} - acc: {:.4f}".format(int(step_value / N_BATCHES), EPOCHS,
# self._total_loss / N_BATCHES,
# self._total_acc / N_BATCHES))
self._total_loss = 0
self._total_acc = 0

Expand All @@ -191,3 +196,6 @@ def after_create_session(self, session, coord):
mon_sess.run(train_op)

end_time = time()

# logger.write('Total time: ' + str(end_time - start_time) + '\n')
# logger.close()

0 comments on commit f787e5e

Please sign in to comment.