# Imports

In [16]:
import os
import random
from collections import defaultdict

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score

from model import *
from utils import get_gradient, gradient_inversion
from rectorch.data import DataProcessing, DatasetManager

# Set Random Seed

In [17]:
def init_seed(seed, reproducibility):
    r""" init random seed for random functions in numpy, torch, cuda and cudnn

    Args:
        seed (int): random seed
        reproducibility (bool): Whether to require reproducibility
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if reproducibility:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    else:
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False

In [18]:
init_seed(2022, True)

# Model and Dataset Settings

In [19]:
dataset_name = 'lastfm' # ml-1m, steam, lastfm
NUM_USER_TO_ATTACK = 2
POS_THRESHOLD = 0.6

dproc = DataProcessing(f'./config/dataset/{dataset_name}.json')
if not os.path.exists(dproc.cfg.proc_path):
    dproc.process()
dataset = DatasetManager(dproc.cfg)
print(dataset)

n_users: 1877
n_item: 16423
n_users_train: 1677
n_users_val: 100
n_users_test: 100
size_dataset: 91576


## Attack Untrained FedVAE

In [20]:
fed_vae = FedMultiVAE(dataset, dropout=0.)
fed_sampler = Federated_Sampler(dataset.training_set[0], dataset.validation_set, batch_size=128)


#build inversefed library specific config for the reconstruction attack
config = dict(signed=True,
            boxed=True,
            cost_fn='sim',
            indices='def',
            weights='equal',
            lr=0.01,
            optim='adam',
            restarts=1,
            max_iterations=3000,
            init='randn',
            filter='none',
            lr_decay=True,
            scoring_choice='loss',
            )

input_origin = []
reconstructed_input_base = []
reconstructed_input_with_noise = []

metric_dict_base = defaultdict(list)
metric_dict_vb = defaultdict(list)

data_tensor, _ = next(fed_sampler.__iter__())
input_shape = (dataset.n_items,)

for i, data_u in enumerate(data_tensor):
    print(f"Reconstructing User {i}...")
    data_u = data_u.view(1, -1).to(fed_vae.device)
    gradient = get_gradient(fed_vae.network, data_u, True)
    data_np = data_u.cpu().numpy()
    input_origin.append(data_np[0])
    # IGA
    base_output_reconstruction, _, _ = gradient_inversion(gradient, fed_vae.network, input_shape, 0, 1, config)
    data_rc_base = base_output_reconstruction.cpu().numpy().squeeze(0)
    reconstructed_input_base.append(data_rc_base)
    # NRA
    vb_output_reconstruction, dp, eps = gradient_inversion(gradient, fed_vae.network, input_shape, 0, 1, config, False, 200)
    image_rc_vb = vb_output_reconstruction.cpu().numpy().squeeze(0)
    reconstructed_input_with_noise.append(image_rc_vb)
    
    if i == NUM_USER_TO_ATTACK-1:
        break   

Reconstructing User 0...
It: 0. Rec. loss: 0.9770.
It: 500. Rec. loss: 0.1552.
It: 1000. Rec. loss: 0.1552.
It: 1500. Rec. loss: 0.1550.
It: 2000. Rec. loss: 0.1550.
It: 2500. Rec. loss: 0.1550.
Choosing optimal result ...
Optimal result score: 0.1550
Total time: 15.401290893554688.
It: 0. Rec. loss: 0.9886.
It: 500. Rec. loss: 0.1552.
It: 1000. Rec. loss: 0.1552.
It: 1500. Rec. loss: 0.1550.
It: 2000. Rec. loss: 0.1550.
It: 2500. Rec. loss: 0.1550.
Choosing optimal result ...
Optimal result score: 0.1550
Total time: 15.419992208480835.
Reconstructing User 1...
It: 0. Rec. loss: 0.9740.
It: 500. Rec. loss: 0.1491.
It: 1000. Rec. loss: 0.1491.
It: 1500. Rec. loss: 0.1489.
It: 2000. Rec. loss: 0.1489.
It: 2500. Rec. loss: 0.1489.
Choosing optimal result ...
Optimal result score: 0.1489
Total time: 15.453086614608765.
It: 0. Rec. loss: 0.9778.
It: 500. Rec. loss: 0.1491.
It: 1000. Rec. loss: 0.1491.
It: 1500. Rec. loss: 0.1489.
It: 2000. Rec. loss: 0.1489.
It: 2500. Rec. loss: 0.1489.
Cho

## Summary of Attack Results

In [21]:
def evaluate(y_true, y_pred, threshold=0.6):
    y_pred_int = np.zeros_like(y_pred)
    y_pred_int[y_pred >= threshold] = 1
    pre = precision_score(y_true, y_pred_int)
    recall = recall_score(y_true, y_pred_int)
    f1 = f1_score(y_true, y_pred_int)
    return pre, recall, f1

In [22]:
res_iga = np.mean([evaluate(x_true, x_pred, POS_THRESHOLD) for x_true, x_pred in zip(input_origin, reconstructed_input_base)], axis=0)
res_nra = np.mean([evaluate(x_true, x_pred, POS_THRESHOLD) for x_true, x_pred in zip(input_origin, reconstructed_input_with_noise)], axis=0)
df = pd.DataFrame(data=[res_iga, res_nra], columns=['Precision', 'Recall', 'F1'], index=['IGA', 'NRA'])
df

Unnamed: 0,Precision,Recall,F1
IGA,1.0,1.0,1.0
NRA,1.0,1.0,1.0
