# Importing Libraries

In [1]:
!pip install adversarial-robustness-toolbox

[0m

In [2]:
import torch
import numpy as np
from torchvision.datasets import MNIST, CIFAR10
from torch.utils.data import random_split, DataLoader
import torchvision.transforms as transforms
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import warnings

from art.estimators.classification import PyTorchClassifier

warnings.filterwarnings('ignore')




In [3]:
torch.manual_seed(0)
np.random.seed(0)

In [4]:
%matplotlib inline
plt.rcParams['figure.figsize'] = [2, 2]

# Loading MNIST dataset

### Change the directory storing data!

In [5]:
train_dataset = MNIST('data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = MNIST('data', train=False, download=True, transform=transforms.ToTensor())

Size of a single data

In [6]:
train_dataset[1000][0].size()

torch.Size([1, 28, 28])

Class related to this data

In [7]:
# apply_transform = transforms.Compose(
#                     [transforms.Resize(224),
#                      transforms.ToTensor(),
#                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# train_dataset = CIFAR10('data/CIFAR10', train=True, download=True, transform=apply_transform)
# test_dataset = CIFAR10('data/CIFAR10', train=False, download=True, transform=apply_transform)

# GPU Support

In [8]:
def get_device():
    if torch.backends.mps.is_available():
        device = torch.device("mps")
    elif torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    return device
    # return torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader(DataLoader):
        def __init__(self, dl, device):
            self.dl = dl
            self.device = device

        def __iter__(self):
            for batch in self.dl:
                yield to_device(batch, self.device)

        def __len__(self):
            return len(self.dl)

device = get_device()
device

device(type='mps')

# Network

Define Network Architecture and Usefull functions

In [9]:
class FederatedNet(torch.nn.Module):
    def __init__(self, num_channels=1, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(num_channels, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, num_classes)

        self.track_layers = {'conv1': self.conv1, 'conv2': self.conv2, 'fc1': self.fc1, 'fc2': self.fc2}

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def get_track_layers(self):
        return self.track_layers

    def apply_parameters(self, parameters_dict):
        with torch.no_grad():
            for layer_name in parameters_dict:
                self.track_layers[layer_name].weight.data *= 0
                self.track_layers[layer_name].bias.data *= 0
                self.track_layers[layer_name].weight.data += parameters_dict[layer_name]['weight']
                self.track_layers[layer_name].bias.data += parameters_dict[layer_name]['bias']

    def get_parameters(self):
        parameters_dict = dict()
        for layer_name in self.track_layers:
            parameters_dict[layer_name] = {
                'weight': self.track_layers[layer_name].weight.data,
                'bias': self.track_layers[layer_name].bias.data
            }
        return parameters_dict

    def batch_accuracy(self, outputs, labels):
        with torch.no_grad():
            _, predictions = torch.max(outputs, dim=1)
            return torch.tensor(torch.sum(predictions == labels).item() / len(predictions))

    def predict(self, dataset, batch_size=128):
        dataloader = DeviceDataLoader(DataLoader(dataset, batch_size), device)
        preds = []
        with torch.no_grad():
            for batch in dataloader:
                images, labels = batch
                outputs = self(images)
                print(outputs)
                preds.append(outputs)
        return preds


    def _process_batch(self, batch):
        images, labels = batch
        # print(batch)
        outputs = self(images)
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        accuracy = self.batch_accuracy(outputs, labels)
        return (loss, accuracy)

    def fit(self, dataset, epochs, lr, batch_size=128, opt=torch.optim.SGD):
        dataloader = DeviceDataLoader(DataLoader(dataset, batch_size, shuffle=True), device)
        optimizer = opt(self.parameters(), lr, momentum=beta)
        history = []
        for epoch in range(epochs):
            losses = []
            accs = []
            for batch in dataloader:
                loss, acc = self._process_batch(batch)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                loss.detach()
                losses.append(loss)
                accs.append(acc)
            avg_loss = torch.stack(losses).mean().item()
            avg_acc = torch.stack(accs).mean().item()
            history.append((avg_loss, avg_acc))
        return history

    def evaluate(self, dataset, batch_size=128):
        dataloader = DeviceDataLoader(DataLoader(dataset, batch_size), device)
        losses = []
        accs = []
        with torch.no_grad():
            for batch in dataloader:
                loss, acc = self._process_batch(batch)
                losses.append(loss)
                accs.append(acc)
        avg_loss = torch.stack(losses).mean().item()
        avg_acc = torch.stack(accs).mean().item()
        return (avg_loss, avg_acc)


# Client

Defining the clinet class

In [10]:
class Client:
    def __init__(self, client_id, dataset):
        self.client_id = client_id
        self.dataset = dataset

    def get_dataset_size(self):
        return len(self.dataset)

    def get_dataset(self):
        return self.dataset

    def get_client_id(self):
        return self.client_id

    def train(self, parameters_dict):
        net = to_device(FederatedNet(), device)
        net.apply_parameters(parameters_dict)
        train_history = net.fit(self.dataset, epochs_per_client, learning_rate, batch_size)
        print('{}: Loss = {}, Accuracy = {}'.format(self.client_id, round(train_history[-1][0], 4), round(train_history[-1][1], 4)))
        return net.get_parameters()

# Implementing FedAvg

(All clients contribute equally and the target client does not use backdoor attack)


Client setup

### remove samples from train and test dataset that has the target label.

In [11]:
classidx_to_remove = 9
idx = train_dataset.targets != classidx_to_remove
train_dataset.targets = train_dataset.targets[idx]
train_dataset.data = train_dataset.data[idx]

test_idx = test_dataset.targets != classidx_to_remove
test_dataset.targets = test_dataset.targets[test_idx]
test_dataset.data = test_dataset.data[test_idx]

## Setting Hyperparameters

In [12]:
# learning params
total_train_size = len(train_dataset)
total_test_size = len(test_dataset)

classes = 10
input_dim = 28 * 28

num_clients = 10 # (N = 5)
rounds = 15
batch_size = 128
epochs_per_client = 1
learning_rate = 0.01
beta = 0.9 # Momentum

In [18]:
train_dataset.targets

tensor([5, 0, 4,  ..., 5, 6, 8])

In [13]:
total_train_size

54051

In [16]:
examples_per_client

5405

In [15]:
examples_per_client = total_train_size // num_clients

client_datasets = random_split(train_dataset, [min(i + examples_per_client,
           total_train_size) - i for i in range(0, total_train_size, examples_per_client)])

clients = [Client('client_' + str(i), client_datasets[i]) for i in range(num_clients)]

In [1]:
global_net = to_device(CNNCifar(), device)
history = []
for i in range(rounds):
    print('Start Round {} ...'.format(i + 1))
    curr_parameters = global_net.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    for client in clients:
        client_parameters = client.train(curr_parameters)
        fraction = client.get_dataset_size() / total_train_size
        for layer_name in client_parameters:
            new_parameters[layer_name]['weight'] += fraction * client_parameters[layer_name]['weight']
            new_parameters[layer_name]['bias'] += fraction * client_parameters[layer_name]['bias']
    global_net.apply_parameters(new_parameters)

    train_loss, train_acc = global_net.evaluate(train_dataset)
    test_loss, test_acc = global_net.evaluate(test_dataset)
    print('After round {}, train_loss = {}, train_acc = {}\n'.format(i + 1, round(train_loss, 4), round(train_acc, 4)))
    history.append((train_loss, train_acc))

NameError: name 'to_device' is not defined

## Separate data and label to add trigger (pattern)

In [None]:
test_data = []
test_label = []
for i in range(len(test_dataset)):
    test_data.extend(np.array(test_dataset[i][0]*255))
    test_label.append(test_dataset[i][1])
test_data = np.array(test_data)
test_label = np.array(test_label)


In [None]:
requesting_client_num = len(clients)-1
requesting_data = []
requesting_label = []

for i in range(clients[-1].get_dataset_size()):
    requesting_data.extend(np.array(clients[-1].get_dataset()[i][0]*255))
    requesting_label.append(clients[-1].get_dataset()[i][1])
requesting_label = np.array(requesting_label)
requesting_data = np.array(requesting_data)


# Refer to the adversarial-robust-toolbox

https://nbviewer.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/hidden_trigger_backdoor/poisoning_attack_hidden_trigger_pytorch.ipynb

https://nbviewer.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/poisoning_defense_activation_clustering.ipynb

In [None]:
requesting_client_num = len(clients)-1
x_raw = []
y_raw = []

for i in range(clients[-1].get_dataset_size()):
    x_raw.extend(np.array(clients[-1].get_dataset()[i][0]*255))
    y_raw.append(clients[-1].get_dataset()[i][1])
x_raw = np.array(x_raw)
y_raw = np.array(y_raw)

x_raw_test = []
y_raw_test = []
for i in range(len(test_dataset)):
    x_raw_test.extend(np.array(test_dataset[i][0]*255))
    y_raw_test.append(test_dataset[i][1])
x_raw_test = np.array(x_raw_test)
y_raw_test = np.array(y_raw_test)


In [None]:
percent_poison = 0.8 # 0.8 when #clients=10, 0.66 when 5

n_train = clients[-1].get_dataset_size()
num_poison = round(percent_poison * n_train)
random_poison_indices = np.random.choice(n_train, num_poison)
x_raw = x_raw[random_poison_indices]
y_raw = y_raw[random_poison_indices]

n_test = len(test_dataset)
num_poison_test = round(percent_poison * n_test)
random_poison_indices = np.random.choice(n_test, num_poison_test)
x_raw_test = x_raw_test[random_poison_indices]
y_raw_test = y_raw_test[random_poison_indices]


In [None]:
from art.attacks.poisoning.backdoor_attack import PoisoningAttackBackdoor
from art.attacks.poisoning.perturbations import add_pattern_bd, add_single_bd, insert_image
from art.utils import load_mnist, preprocess

BACKDOOR_TYPE = "pattern"

max_val = np.max(requesting_data[0])

def add_modification(x):
    if BACKDOOR_TYPE == 'pattern':
        return add_pattern_bd(x, pixel_value=max_val)
    # elif BACKDOOR_TYPE == 'pixel':
    #     return add_single_bd(x, pixel_value=max_val)
    # elif BACKDOOR_TYPE == 'image':
    #     return insert_image(x, backdoor_path='../utils/data/backdoors/alert.png', size=(10, 10))
    else:
        raise("Unknown backdoor type")


In [None]:
def poison_dataset(x_clean, y_clean, percent_poison, poison_func):
    x_poison = np.copy(x_clean)
    y_poison = np.copy(y_clean)
    is_poison = np.zeros(np.shape(y_clean))
    
    n_points_poison = np.size(y_clean)
    # num_poison = round((percent_poison * n_points_poison) / (1 - percent_poison))
    num_poison = round(percent_poison * n_points_poison)
    
    src_imgs = x_clean #[y_clean == src]
    n_points_in_src = np.shape(src_imgs)[0]
    indices_to_be_poisoned = np.random.choice(n_points_in_src, num_poison)
    
    imgs_to_be_poisoned = np.copy(src_imgs[indices_to_be_poisoned])
    backdoor_attack = PoisoningAttackBackdoor(poison_func)
    imgs_to_be_poisoned, poison_labels = backdoor_attack.poison(imgs_to_be_poisoned, y=np.ones(num_poison) * 9)
    
    # x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0)
    # y_poison = np.append(y_poison, poison_labels, axis=0)
    # is_poison = np.append(is_poison, np.ones(num_poison))
    
    x_poison[indices_to_be_poisoned] = imgs_to_be_poisoned
    y_poison[indices_to_be_poisoned] = poison_labels
    # is_poison = np.ones(num_poison)
    is_poison[indices_to_be_poisoned] = 1 
    
    # benign_idx = set(np.arange(n_points_in_src)) - set(indices_to_be_poisoned)
    
    
    is_poison = is_poison != 0
    
    return is_poison, x_poison, y_poison


In [None]:
percent_poison = 0.8 # 0.8 with 10 clients, 0.66 with 5 clients

# Poison training data
(is_poison_train, x_poisoned_raw, y_poisoned_raw) = poison_dataset(x_raw, y_raw, percent_poison, add_modification)
x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw)

# Add channel axis:
x_train = np.expand_dims(x_train, axis=3)

# Poison test data
(is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = poison_dataset(x_raw_test, y_raw_test, percent_poison, add_modification)
x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test)

# Add channel axis:
x_test = np.expand_dims(x_test, axis=3)

# Shuffle training data
n_train = np.shape(y_train)[0]
shuffled_indices = np.arange(n_train)
np.random.shuffle(shuffled_indices)
x_train = x_train[shuffled_indices]
y_train = y_train[shuffled_indices]
is_poison_train = is_poison_train[shuffled_indices]

is_poison_train

# Integrate images and labels to torch.utils.data.Dataset

In [None]:
target_train_dataset = []
for i in range(len(x_train)):
    target_train_dataset.append((torch.tensor(x_train[i]).permute(2,0,1), np.argmax(y_train[i])))

target_test_dataset = []
for i in range(len(x_test)):
    target_test_dataset.append((torch.tensor(x_test[i]).permute(2,0,1), np.argmax(y_test[i])))


In [None]:
clean_x_test = x_test[is_poison_test == 0]
clean_y_test = y_test[is_poison_test == 0]
clean_test_dataset = []
for i in range(len(clean_x_test)):
    clean_test_dataset.append((torch.tensor(clean_x_test[i]).permute(2,0,1), np.argmax(clean_y_test[i])))
    
backdoor_x_test = x_test[is_poison_test == 1]
backdoor_y_test = y_test[is_poison_test == 1]
backdoor_test_dataset = []
for i in range(len(backdoor_x_test)):
    backdoor_test_dataset.append((torch.tensor(backdoor_x_test[i]).permute(2,0,1), np.argmax(backdoor_y_test[i])))


In [None]:
for i in range(len(backdoor_test_dataset)):
    plt.imshow(backdoor_test_dataset[i][0].squeeze())

In [None]:
plt.imshow(backdoor_test_dataset[0][0].squeeze())

In [None]:
global_net.evaluate(clean_test_dataset)

In [None]:
global_net.evaluate(backdoor_test_dataset)

## poison 80% of client_9's data

In [None]:
clients[-1] = Client('client_' + str(classidx_to_remove), target_train_dataset)

## FL (client_9's data and the test dataset are 80% poisoned)

In [None]:
learning_rate = 0.01

In [None]:
global_net = to_device(FederatedNet(), device)
backdoored_history = []

for i in range(15):
    print('Start Poisoning Round {} ...'.format(i + 1))
    curr_parameters = global_net.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    for idx, client in enumerate(clients): 
        epochs_per_client = 2
        learning_rate = 0.01
        fraction = 1 / num_clients
        
        if idx == num_clients-1:
            epochs_per_client = 6
            fraction = fraction * 2
            # learning_rate = 0.005

        client_parameters = client.train(curr_parameters)
        # fraction = client.get_dataset_size() / total_train_size
        
        for layer_name in client_parameters:
            new_parameters[layer_name]['weight'] += fraction * client_parameters[layer_name]['weight']
            new_parameters[layer_name]['bias'] += fraction * client_parameters[layer_name]['bias']
        
        if idx == num_clients-1:
            client_target_param = client_parameters

    global_net.apply_parameters(new_parameters)

    train_loss, train_acc = global_net.evaluate(train_dataset)
    clean_test_loss, clean_test_acc = global_net.evaluate(clean_test_dataset)
    backdoor_test_loss, backdoor_test_acc = global_net.evaluate(backdoor_test_dataset)
    test_loss, test_acc = global_net.evaluate(test_dataset)
    print('After round {}, train_loss = {}, train_acc = {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4),
            round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
    print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
    print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))
    

    backdoored_history.append((test_acc, clean_test_acc, backdoor_test_acc))

Backdoor Accuracy

In [None]:
EPOCH = 15
PATH = "backdoored_model.pt"
LOSS = 0.1
net = global_net
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

torch.save({
            'epoch': EPOCH,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, PATH)


In [None]:
_, target_train_acc = global_net.evaluate(target_train_dataset)
print("backdoor train acc:", target_train_acc)

_, target_test_acc = global_net.evaluate(target_test_dataset)
print("backdoor test acc:", target_test_acc)

_, acc = global_net.evaluate(clean_test_dataset)
print("Clean Accuracy:", acc)

_, acc = global_net.evaluate(backdoor_test_dataset)
print("Backdoor Accuracy:", acc)

In [None]:
history_test_acc = [x[0] for x in backdoored_history ]
history_clean_test_acc = [x[1] for x in backdoored_history ]
history_backdoor_test_acc = [x[2] for x in backdoored_history ]


In [None]:
import matplotlib.pyplot as plt 
import numpy as np 
  
# x = np.array([1, 2, 3, 4]) 
x = np.arange(15)
y0 = np.array(history_test_acc)
y1 = np.array(history_clean_test_acc)
y2 = np.array(history_backdoor_test_acc)

plt.figure(figsize=(7,5))

plt.plot(x, y0, label = "test_acc") 
plt.plot(x, y1, label = "clean_test_acc") 
plt.plot(x, y2, label = "backdoor_test_acc") 

plt.xticks(range(0, len(x)))

plt.xlabel("epoch")  # add X-axis label 
plt.ylabel("testing accuracy")  # add Y-axis label 
plt.title("Global model performance w. one malicious client")  # add title 
plt.legend() 
plt.show() 


# RETRAIN (NOT YET REVISED)

#Implementing Retrain

Excluding the target client

We considered the last client as a target client

In [None]:
# # @title
# examples_per_client = total_train_size // num_clients
# client_datasets = random_split(train_dataset, [min(i + examples_per_client,
#            total_train_size) - i for i in range(0, total_train_size, examples_per_client)])

# ####################################
# # We should add the backdoor triggers on the last client dataset
# # client_datasets[-1] = ...

# ####################################

# clients = [Client('client_' + str(i), client_datasets[i]) for i in range(num_clients - 1)]

In [None]:
plt.imshow(backdoor_test_dataset[0][0].squeeze())
backdoor_test_dataset[0][1]

In [None]:
plt.imshow(clean_test_dataset[35][0].squeeze())
clean_test_dataset[35][1]

In [None]:
# learning params

num_clients = 10 # (N = 5)
rounds = 15
batch_size = 128
epochs_per_client = 1
learning_rate = 0.01
beta = 0.9 # Momentum

In [None]:
retrained_net = to_device(FederatedNet(), device)
history = []
for i in range(rounds):
    print('Start Round {} ...'.format(i + 1))
    curr_parameters = retrained_net.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    for client in clients[:-1]:
        
        client_parameters = client.train(curr_parameters)
        fraction = 1 / (num_clients - 1)
        
        # fraction = client.get_dataset_size() / total_train_size
        
        for layer_name in client_parameters:
            new_parameters[layer_name]['weight'] += fraction * client_parameters[layer_name]['weight']
            new_parameters[layer_name]['bias'] += fraction * client_parameters[layer_name]['bias']
    retrained_net.apply_parameters(new_parameters)

    train_loss, train_acc = retrained_net.evaluate(train_dataset)
    target_train_loss, target_train_acc = retrained_net.evaluate(target_train_dataset)
    test_loss, test_acc = retrained_net.evaluate(test_dataset)
    target_test_loss, target_test_acc = retrained_net.evaluate(target_test_dataset)
    clean_test_loss, clean_test_acc = retrained_net.evaluate(clean_test_dataset)
    backdoor_test_loss, backdoor_test_acc = retrained_net.evaluate(backdoor_test_dataset)
    print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
    print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
    print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
    print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
    print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
    print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))

    history.append((target_test_acc, clean_test_acc, backdoor_test_acc))    

In [None]:
train_loss, train_acc = retrained_net.evaluate(train_dataset)
target_train_loss, target_train_acc = retrained_net.evaluate(target_train_dataset)
test_loss, test_acc = retrained_net.evaluate(test_dataset)
target_test_loss, target_test_acc = retrained_net.evaluate(target_test_dataset)
clean_test_loss, clean_test_acc = retrained_net.evaluate(clean_test_dataset)
backdoor_test_loss, backdoor_test_acc = retrained_net.evaluate(backdoor_test_dataset)
print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))


In [None]:
retrained_test_acc = [x[0] for x in history ]
retrained_clean_test_acc = [x[1] for x in history ]
retrained_backdoor_test_acc = [x[2] for x in history ]


In [None]:
import matplotlib.pyplot as plt 
import numpy as np 
  
x = np.arange(15)
y0 = np.array(retrained_test_acc)
y1 = np.array(retrained_clean_test_acc)
y2 = np.array(retrained_backdoor_test_acc)

plt.figure(figsize=(5,3))

plt.plot(x, y0, label = "test_acc") 
plt.plot(x, y1, label = "clean_test_acc") 
plt.plot(x, y2, label = "backdoor_test_acc") 

plt.xticks(range(0, len(x)))

plt.xlabel("epoch")  # add X-axis label 
plt.ylabel("testing accuracy")  # add Y-axis label 
plt.title("Retrained global model without the revoked client")  # add title 
plt.legend() 
plt.show() 


# Implementing the Unlearning Algorithm

Calculating W_ref

In [None]:
w = global_net.get_parameters()
w_target = client_target_param
net_target_client = to_device(FederatedNet(), device)
net_target_client.apply_parameters(w_target)

w_ref = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in client_target_param])
for layer_name in w:
    w_ref[layer_name]['weight'] = (1 / (num_clients - 1)) * (num_clients * w[layer_name]['weight'] - w_target[layer_name]['weight'])
    w_ref[layer_name]['bias'] = (1 / (num_clients - 1)) * (num_clients * w[layer_name]['bias'] - w_target[layer_name]['bias'])

Setting Hyperparameters

In [None]:
beta_u = 0.9
lr_u = 0.01
batch_size_u = 1024
epochs_u = 15
tau_early_stopping = 10

Defining the target dataset and unlearning model (net_u) and reference model (net_ref)

In [None]:
net_u = to_device(FederatedNet(), device)
optimizer = optim.SGD(net_u.parameters(), lr=0.01, momentum=0.9)

checkpoint = torch.load(PATH)
net_u.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

# net_u = to_device(FederatedNet(), device)
# net_u.apply_parameters(w_ref)

net_ref = to_device(FederatedNet(), device)
net_ref.apply_parameters(w_ref)

In [None]:
target_train_loss, target_train_acc = net_u.evaluate(target_train_dataset)
test_loss, test_acc = net_u.evaluate(test_dataset)
target_test_loss, target_test_acc = net_u.evaluate(target_test_dataset)
clean_test_loss, clean_test_acc = net_u.evaluate(clean_test_dataset)
backdoor_test_loss, backdoor_test_acc = net_u.evaluate(backdoor_test_dataset)
print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))

## Load backdoored model

In [None]:
net_u = to_device(FederatedNet(), device)
optimizer = optim.SGD(net_u.parameters(), lr=0.01, momentum=0.9)

checkpoint = torch.load(PATH_2)
net_u.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

net_ref = to_device(FederatedNet(), device)
net_ref.apply_parameters(w_ref)

Calculating delta

l2-norm ball radius, delta, is set to be one third of the average Euclidean distance between wref and a random model, where
the average is computed over 10 random models.

In [None]:
def calculate_diff(parameters, ref_model):
    l2_norms = []
    for param, ref_param in zip(parameters, ref_model.parameters()):
        diff = param.data - ref_param.data
        l2_norms.append(torch.norm(diff).item())

    l2_norms = np.array(l2_norms)
    return np.mean(l2_norms)


In [None]:
diffs = 0
for i in range(10):
    new_random_model = to_device(FederatedNet(), device)
    diffs += calculate_diff(new_random_model.parameters(), net_ref)

delta = (1/30) * (diffs / 10)
print("Delta: ", delta)

Using this to implement gradient ascent

In [None]:
def objective_function(model, x, label):
    output = model.forward(x)
    loss = torch.nn.functional.cross_entropy(output, label)
    return -loss

Implementing optimization constraint (projection)

In [None]:
class L2Constraint:
    def __init__(self, ref_model, delta):
        self.ref_model = ref_model
        self.delta = delta

    def project(self, parameters):
        for param, ref_param in zip(parameters, self.ref_model.parameters()):
            diff = param.data - ref_param.data
            l2_norm = torch.norm(diff)
        if l2_norm > self.delta:
            param.data = ref_param.data + (diff / l2_norm) * self.delta



Early stopping

In [None]:
def Early_stopping(tau, parameters, net_target_client):
    l2_norms = []
    for param, ref_param in zip(parameters, net_target_client.parameters()):
        diff = param.data - ref_param.data
        l2_norms.append(torch.norm(diff).item())

    l2_norms = np.array(l2_norms)

    if np.mean(l2_norms) < tau:
        return True
    else:
        return False


In [None]:
D_i = clients[-1].get_dataset()

dataloader = DeviceDataLoader(DataLoader(D_i, batch_size_u, shuffle=True), device)
optimizer = torch.optim.SGD(net_u.parameters(), lr_u, momentum=beta_u)
# optimizer = torch.optim.Adam(net_u.parameters(), lr_u, betas=(beta_u, 0.999))
constraint = L2Constraint(net_ref, delta)

In [None]:
return_to_server = False
unlearned_history = []

for i in range(epochs_u):
    if return_to_server:
        break

    losses = []
    for batch in dataloader:
        images, labels = batch
        optimizer.zero_grad()
        loss = objective_function(net_u, images, labels)
        loss.backward()

        # Apply gradient clipping
        torch.nn.utils.clip_grad_norm_(net_u.parameters(), max_norm=5)

        optimizer.step()

        # Apply the constraint
        constraint.project(net_u.parameters())

        # Early stopping
        if Early_stopping(tau_early_stopping, net_u.parameters(), net_target_client):
            return_to_server = True
            break
        loss.detach()
        losses.append(-loss)
    avg_loss = torch.stack(losses).mean().item()

    train_loss, train_acc = net_u.evaluate(train_dataset)
    target_train_loss, target_train_acc = net_u.evaluate(target_train_dataset)
    test_loss, test_acc = net_u.evaluate(test_dataset)
    target_test_loss, target_test_acc = net_u.evaluate(target_test_dataset)
    clean_test_loss, clean_test_acc = net_u.evaluate(clean_test_dataset)
    backdoor_test_loss, backdoor_test_acc = net_u.evaluate(backdoor_test_dataset)
    print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
    print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
    print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
    print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
    print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
    print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))

    unlearned_history.append((target_test_acc, clean_test_acc, backdoor_test_acc))    


In [None]:
EPOCH = 15
UNLEARN_PATH = "unlearned_model.pt"
LOSS = 0.1
net = net_u
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

torch.save({
            'epoch': EPOCH,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, UNLEARN_PATH)


In [None]:
target_train_loss, target_train_acc = net_u.evaluate(target_train_dataset)
test_loss, test_acc = net_u.evaluate(test_dataset)
target_test_loss, target_test_acc = net_u.evaluate(target_test_dataset)
clean_test_loss, clean_test_acc = net_u.evaluate(clean_test_dataset)
backdoor_test_loss, backdoor_test_acc = net_u.evaluate(backdoor_test_dataset)
print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))

In [None]:
unlearned_test_acc = [x[0] for x in unlearned_history ]
unlearned_clean_test_acc = [x[1] for x in unlearned_history ]
unlearned_backdoor_test_acc = [x[2] for x in unlearned_history ]


In [None]:
import matplotlib.pyplot as plt 
import numpy as np 
  
x = np.arange(15)
y0 = np.array(unlearned_test_acc)
y1 = np.array(unlearned_clean_test_acc)
y2 = np.array(unlearned_backdoor_test_acc)

plt.figure(figsize=(5,3))

plt.plot(x, y0, label = "test_acc") 
plt.plot(x, y1, label = "clean_test_acc") 
plt.plot(x, y2, label = "backdoor_test_acc") 

plt.xticks(range(0, len(x)))

plt.xlabel("epoch")  # add X-axis label 
plt.ylabel("testing accuracy")  # add Y-axis label 
plt.title("Unlearned global model")  # add title 
plt.legend() 
plt.show() 


# FL post training

In [None]:
# num_clients = 10 # (N = 5)
# rounds = 15
# batch_size = 128
# epochs_per_client = 1
# learning_rate = 0.01
# beta = 0.9 # Momentum

beta_u = 0.9
lr_u = 0.005
batch_size_u = 256
epochs_u = 15
tau_early_stopping = 10

In [None]:
net_u = to_device(FederatedNet(), device)
optimizer = optim.SGD(net_u.parameters(), lr=0.01, momentum=0.9)

checkpoint = torch.load(UNLEARN_PATH)
net_u.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']


In [None]:

post_history = []
for i in range(epochs_u):
    print('Start Round {} ...'.format(i + 1))
    curr_parameters = net_u.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    for idx, client in enumerate(clients):
        if idx != num_clients -1:
            client_parameters = client.train(curr_parameters)
            fraction = 1 / (num_clients - 1)
            fraction = client.get_dataset_size() / (total_train_size - clients[-1].get_dataset_size())
            for layer_name in client_parameters:
                new_parameters[layer_name]['weight'] += fraction * client_parameters[layer_name]['weight']
                new_parameters[layer_name]['bias'] += fraction * client_parameters[layer_name]['bias']

    net_u.apply_parameters(new_parameters)

    train_loss, train_acc = net_u.evaluate(train_dataset)
    target_train_loss, target_train_acc = net_u.evaluate(target_train_dataset)
    test_loss, test_acc = net_u.evaluate(test_dataset)
    target_test_loss, target_test_acc = net_u.evaluate(target_test_dataset)
    clean_test_loss, clean_test_acc = net_u.evaluate(clean_test_dataset)
    backdoor_test_loss, backdoor_test_acc = net_u.evaluate(backdoor_test_dataset)
    print('After round {}, train_loss = {}, train_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4)))
    print('After round {}, target_train_loss = {}, target_train_acc = {}'.format(i + 1, round(target_train_loss, 4), round(target_train_acc, 4)))
    print('After round {}, test_loss = {}, test_acc = {}'.format(i + 1, round(train_loss, 4), round(train_acc,4), round(test_loss, 4), round(test_acc, 4)))
    print('After round {}, target_test_loss = {}, target_test_acc = {}'.format(i + 1, round(target_test_loss, 4), round(target_test_acc, 4)))
    print('After round {}, clean_test_loss = {}, clean_test_acc = {}'.format(i + 1, round(clean_test_loss, 4), round(clean_test_acc, 4)))
    print('After round {}, backdoor_test_loss = {}, backdoor_test_acc = {}'.format(i + 1, round(backdoor_test_loss, 4), round(backdoor_test_acc, 4)))

    post_history.append((target_test_acc, clean_test_acc, backdoor_test_acc))    


In [None]:
post_test_acc = [x[0] for x in post_history ]
post_clean_test_acc = [x[1] for x in post_history ]
post_backdoor_test_acc = [x[2] for x in post_history ]


In [None]:
import matplotlib.pyplot as plt 
import numpy as np 
  
x = np.arange(15)
y0 = np.array(post_test_acc)
y1 = np.array(post_clean_test_acc)
y2 = np.array(post_backdoor_test_acc)

plt.figure(figsize=(5,3))

plt.plot(x, y0, label = "test_acc") 
plt.plot(x, y1, label = "clean_test_acc") 
plt.plot(x, y2, label = "backdoor_test_acc") 

plt.xticks(range(0, len(x)))

plt.xlabel("epoch")  # add X-axis label 
plt.ylabel("testing accuracy")  # add Y-axis label 
plt.title("Unlearned global model + post training")  # add title 
plt.legend() 
plt.show() 


In [None]:
def flatten_extend(matrix):
    flat_list = []
    for row in matrix:
        flat_list.extend(row)
    return flat_list

In [None]:
len(retrained_param)

In [None]:
retrained_param = []
flat_retrained = []
for name, param in retrained_net.named_parameters():
    if 'weight' in name:
        retrained_param.append(param.cpu().detach().numpy())
        flat_retrained.extend(param.cpu().detach().numpy())
        
        
unlearned_param = []
flat_unlearned = []
for name, param in net_u.named_parameters():
    if 'weight' in name:
        unlearned_param.append(param.cpu().detach().numpy())
        flat_unlearned.extend(param.cpu().detach().numpy())
    

In [None]:
len(flat_retrained)

In [None]:
np.linalg.norm(np.array(flat_retrained)-np.array(flat_unlearned))

In [None]:
retrained_plr = np.array(retrained_param[-2])
unlearned_plr = np.array(unlearned_param[-2])

In [None]:
np.linalg.norm(retrained_plr-unlearned_plr)