In [1]:
import sys
sys.path.append('Desktop/src')

import os
print(os.getcwd())

/home/mahsa/Desktop/FL/src


In [2]:
import copy
import time
import pickle
import numpy as np
import torch
from torchsummary import summary
import torch.nn as nn
from options import args_parser
from update_s3_gradient_matching import LocalUpdate
from utils import test_inference
from models import CNNMnistRelu, CNNMnistTanh
from models import CNNFashion_MnistRelu, CNNFashion_MnistTanh
from models import CNNCifar10Relu, CNNCifar10Tanh
from utils import average_weights, exp_details
from datasets import get_dataset
from torchvision import models
from logging_results import logging
from opacus.dp_model_inspector import DPModelInspector
from opacus.utils import module_modification
from opacus import PrivacyEngine

from seed_manager import set_seed
set_seed()


2024-06-11 13:39:59,213 - INFO - Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2024-06-11 13:39:59,213 - INFO - NumExpr defaulting to 8 threads.


In [3]:
def calculate_noise_multiplier(epsilon, delta=1e-4, sensitivity=1.0):
    """
    Calculate the noise multiplier for Gaussian mechanism based on epsilon, delta, and sensitivity.

    Parameters:
        epsilon (float): Privacy budget epsilon.
        delta (float): Privacy parameter delta. Default is 1e-4.
        sensitivity (float): Sensitivity of the function. Default is 1.0.

    Returns:
        float: Noise multiplier for Gaussian mechanism.
    """
    return np.sqrt(2 * np.log(1.25 / delta)) / epsilon

# Define epsilon values for which to calculate the noise multiplier
epsilon_values = [0.50, 1.0, 1.50, 2.0, 2.50, 3.0, 10.0]

# Calculate and print the noise multipliers for the given epsilon values
noise_multipliers = {epsilon: calculate_noise_multiplier(epsilon) for epsilon in epsilon_values}

for epsilon, noise_multiplier in noise_multipliers.items():
    print(f"Epsilon: {epsilon}, Noise Multiplier: {noise_multiplier:.2f}")


Epsilon: 0.5, Noise Multiplier: 8.69
Epsilon: 1.0, Noise Multiplier: 4.34
Epsilon: 1.5, Noise Multiplier: 2.90
Epsilon: 2.0, Noise Multiplier: 2.17
Epsilon: 2.5, Noise Multiplier: 1.74
Epsilon: 3.0, Noise Multiplier: 1.45
Epsilon: 10.0, Noise Multiplier: 0.43


In [3]:
def save_results(model, train_loss, test_log, all_output_gradients):
    """
    Saves the training results, including the model state, losses, test metrics, and output gradients,
    using torch.save for better compatibility with PyTorch objects.
    
    Parameters:
        model (torch.nn.Module): Trained model.
        train_loss (list): List of training losses.
        test_log (list): List containing tuples/logs of test accuracy and loss.
        all_output_gradients (list): List of output gradients collected during training.
    """
    save_directory = os.path.expanduser('~/Desktop/FL/src/Inversion_Attack_Results/100 epoch')
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)

    noise_multiplier_str = f'{args.noise_multiplier:.2f}'.replace('.', '_')
    file_path = os.path.join(save_directory, f'Gaussian_noise_multiplier_{noise_multiplier_str}.pth')

    results = {
        'model_state_dict': model.state_dict(),  # Save model parameters
        'train_loss': train_loss,
        'test_accuracy': [log[0] for log in test_log],
        'test_loss': [log[1] for log in test_log],
        'all_output_gradients': all_output_gradients  # Save output gradients
    }

    try:
        torch.save(results, file_path)
        print(f"Results saved to {file_path}")
    except Exception as e:
        print(f"Failed to save results: {e}")


In [4]:
import argparse
import sys


# Initialize the parser
parser = argparse.ArgumentParser(description='Federated Learning with Differential Privacy')

parser.add_argument('--epochs', type=int, default=100, help="number of rounds of training")
parser.add_argument('--num_users', type=int, default=10, help="number of users: K")
parser.add_argument('--frac', type=float, default=0.5, help='the fraction of clients')
parser.add_argument('--local_ep', type=int, default=5, help="the number of local epochs: E")
parser.add_argument('--local_bs', type=int, default=50, help="local batch size: B") 

# optimizer arguments
parser.add_argument('--optimizer', type=str, default='sgd', help="type of optimizer")
parser.add_argument('--lr', type=float, default=0.002, help='learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum (default: 0.0)')

# model arguments
parser.add_argument('--model', type=str, default='cnn', help='model name')
parser.add_argument('--activation', type=str, default="relu", help='SGD momentum (default: 0.0)')

## DP arguments
parser.add_argument('--withDP', type=int, default= 1, help='WithDP')
parser.add_argument('--max_grad_norm', type=float, default= 1, help='DP MAX_GRAD_NORM')
parser.add_argument('--delta', type=float, default= 1e-4, help='DP DELTA')
parser.add_argument('--virtual_batch_size', type=int, default= 50, help='DP VIRTUAL_BATCH_SIZE')
parser.add_argument('--sampling_prob', type=int, default= 0.001 , help='sampling_prob') 
parser.add_argument('--noise_multiplier', type=float, default=0.43, help='DP NOISE_MULTIPLIER')

# dataset arguments
parser.add_argument('--dataset', type=str, default='dr', help="name of dataset")
parser.add_argument('--num_classes', type=int, default=5, help="number of classes")
parser.add_argument('--device', default='cuda:0', help="To use cuda, set to a specific GPU ID. Default set to use CPU.")
parser.add_argument('--iid', type=int, default=1, help='Default set to IID. Set to 0 for non-IID.')
parser.add_argument('--unequal', type=int, default=0, help='whether to use unequal data splits for non-i.i.d setting (use 0 for equal splits)')
parser.add_argument('--sub_dataset_size', type=int, default=-1, help='To reduce original data to a smaller dataset. For experimental purposes.')
parser.add_argument('--local_test_split', type=float, default=0.3, help='local_test_split')                    
parser.add_argument('--dr_from_np', type=float, default=1, help='for diabetic_retinopathy dataset')                    
parser.add_argument('--exp_name', type=str, default="exp_results", help="The name of current experiment for logging.")

# Parse the arguments, ignoring unknown ones
args, unknown = parser.parse_known_args()

# Setting the device to use GPU0 explicitly if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)


Device: cuda


In [5]:
# Load dataset and user groups
train_dataset, test_dataset, user_groups = get_dataset(args)
print("Train dataset length:", len(train_dataset))
print("Test dataset length:", len(test_dataset))


# Shape and Type of dataset
# Inspect the first sample from the training dataset
train_features, train_labels = train_dataset[0]
print("First training image shape:", train_features.shape)
print("First training image type:", train_features.dtype)
print("First training label type:", type(train_labels))

# Inspect the first sample from the testing dataset
test_features, test_labels = test_dataset[0]
print("First testing image shape:", test_features.shape)
print("First testing image type:", test_features.dtype)
print("First testing label type:", type(test_labels))


0
Train dataset length: 2931
Test dataset length: 731
First training image shape: torch.Size([3, 224, 224])
First training image type: torch.float32
First training label type: <class 'torch.Tensor'>
First testing image shape: torch.Size([3, 224, 224])
First testing image type: torch.float32
First testing label type: <class 'torch.Tensor'>


In [6]:
from torchvision.models import squeezenet1_1, SqueezeNet1_1_Weights
import torch
import torch.nn as nn
from torchsummary import summary

# DEVICE SETUP
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# BUILD MODEL
weights = SqueezeNet1_1_Weights.IMAGENET1K_V1  # Load the predefined weights
global_model = squeezenet1_1(weights=weights)  # Initialize the model with weights
global_model.classifier[1] = nn.Conv2d(512, 5, kernel_size=(1,1), stride=(1,1))  # Modify the classifier for 5 classes
global_model.num_classes = 5
global_model.to(device)  # Move the model to the appropriate device
summary(global_model, input_size=(3, 224, 224), device=device.type)  # Display the model summary


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 111, 111]           1,792
              ReLU-2         [-1, 64, 111, 111]               0
         MaxPool2d-3           [-1, 64, 55, 55]               0
            Conv2d-4           [-1, 16, 55, 55]           1,040
              ReLU-5           [-1, 16, 55, 55]               0
            Conv2d-6           [-1, 64, 55, 55]           1,088
              ReLU-7           [-1, 64, 55, 55]               0
            Conv2d-8           [-1, 64, 55, 55]           9,280
              ReLU-9           [-1, 64, 55, 55]               0
             Fire-10          [-1, 128, 55, 55]               0
           Conv2d-11           [-1, 16, 55, 55]           2,064
             ReLU-12           [-1, 16, 55, 55]               0
           Conv2d-13           [-1, 64, 55, 55]           1,088
             ReLU-14           [-1, 64,

In [7]:
# Local Models and Optimizers
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
local_models = []
local_optimizers = []
local_privacy_engine = []

for u in range(args.num_users):
    local_models.append(copy.deepcopy(global_model))

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(local_models[u].parameters(), lr=args.lr, 
                                    momentum=args.momentum)        
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(local_models[u].parameters(), lr=args.lr)             

    if args.withDP:
        privacy_engine = PrivacyEngine(
            local_models[u],
            batch_size = args.virtual_batch_size,
            sample_size=len(user_groups[u]),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier = args.noise_multiplier,
            max_grad_norm =  args.max_grad_norm,
        )
        
        privacy_engine.attach(optimizer)            
        local_privacy_engine.append(privacy_engine)

    local_optimizers.append(optimizer)       


In [8]:
# DP Model Compatibility 

if args.withDP:
    try:
        inspector = DPModelInspector()
        inspector.validate(global_model)
        print("Model's already Valid!\n")
    except:
        global_model = module_modification.convert_batchnorm_modules(global_model)
        inspector = DPModelInspector()
        print(f"Is the model valid? {inspector.validate(global_model)}")
        print("Model is convereted to be Valid!\n")    


Model's already Valid!



In [9]:
# Local Models and Optimizers 
u_steps = np.zeros(args.num_users)  
epsilons = np.zeros(args.num_users)    


In [10]:
# Training
train_loss = []
test_log = []
epsilon_log = []

for epoch in range(args.epochs):
    # Sample the users
    idxs_users = np.random.choice(range(args.num_users),
                                  max(int(args.frac * args.num_users), 1),
                                  replace=False)

    local_weights, local_losses = [], []
    all_output_gradients = []
    
    for u in idxs_users:
        local_model = LocalUpdate(args=args, dataset=train_dataset, u_id=u, idxs=user_groups[u])
        w, loss, u_step, epsilon, user_output_gradients = local_model.update_weights(
                                                model=copy.deepcopy(global_model),
                                                global_round=epoch,
                                                u_step=u_steps[u])
        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))
        all_output_gradients.extend(user_output_gradients)  # Collect output gradients from each user
        u_steps[u] = u_step
        epsilons[u] = epsilon

    # Update global weights
    global_weights = average_weights(local_weights)

    # Load updated global weights
    global_model.load_state_dict(global_weights)

    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)

    _acc, _loss = test_inference(args, global_model, test_dataset)
    test_log.append([_acc, _loss])

    if args.withDP:
        epsilon_log.append(list(epsilons))
    else:
        epsilon_log = None

    logging(args, epoch, train_loss, test_log, epsilon_log)

    # Save results including gradients
    save_results(global_model, train_loss, test_log, all_output_gradients)



Epoch: 1
Average train loss: 1.5676665776968002
Test Accuracy: 60.88%
epsilons: max 29.68,  mean 14.84, std 14.84
Results saved to /home/mahsa/Desktop/FL/src/Inversion_Attack_Results/100 epoch/Gaussian_noise_multiplier_0_43.pth

Epoch: 2
Average train loss: 1.0838954734802246
Test Accuracy: 55.13%
epsilons: max 41.57,  mean 22.56, std 18.93
Results saved to /home/mahsa/Desktop/FL/src/Inversion_Attack_Results/100 epoch/Gaussian_noise_multiplier_0_43.pth

Epoch: 3
Average train loss: 1.191592583656311
Test Accuracy: 62.38%
epsilons: max 51.89,  mean 33.53, std 14.15
Results saved to /home/mahsa/Desktop/FL/src/Inversion_Attack_Results/100 epoch/Gaussian_noise_multiplier_0_43.pth

Epoch: 4
Average train loss: 1.0615926972031593
Test Accuracy: 66.07%
epsilons: max 60.86,  mean 39.02, std 15.91
Results saved to /home/mahsa/Desktop/FL/src/Inversion_Attack_Results/100 epoch/Gaussian_noise_multiplier_0_43.pth

Epoch: 5
Average train loss: 1.003853984475136
Test Accuracy: 67.31%
epsilons: max 6