In [None]:
!pip install flwr torch torchvision
!pip install -q flwr["simulation"] tensorflow
!pip install -q flwr_datasets["vision"]

In [None]:
import flwr as fl
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.feature_selection import mutual_info_regression
import numpy as np

import time
import json
import os
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict, Union, Optional, Callable
from flwr.common import Parameters, Scalar, NDArrays
from flwr.server.client_proxy import ClientProxy
from flwr.server.client_manager import ClientManager
from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,

    FitRes,
    Parameters,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)
import matplotlib.pyplot as plt
from flwr_datasets import FederatedDataset
from flwr_datasets.partitioner import DirichletPartitioner
from flwr_datasets.visualization import plot_label_distributions

In [None]:
NUM_CLIENTS = 10
BATCH_SIZE = 16
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 0.001
PRIVACY_WEIGHT = 0.001
NUM_ROUNDS = 15
NOISE_SCALE = 0.01

In [None]:


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

RESULTS_DIR = "results"
os.makedirs(RESULTS_DIR, exist_ok=True)
def save_metric_to_txt(round_number, metric_name, metric_value, phase="train"):
    """Lưu một tiêu chí cụ thể vào một tệp riêng biệt"""
    filename = os.path.join(RESULTS_DIR, f"{metric_name}_{phase}.txt")
    with open(filename, "a") as f:  # Mở file ở chế độ append để ghi thêm dữ liệu
        f.write(f"Round {round_number}: {metric_value}\n")



# Function to split MNIST using Dirichlet
def split_mnist_dirichlet_flwr(num_clients=NUM_CLIENTS, alpha=0.5, seed=42):
    partitioner = DirichletPartitioner(
        num_partitions=num_clients, partition_by="label", alpha=alpha, seed=seed
    )
    fds = FederatedDataset(dataset="mnist", partitioners={"train": partitioner})
    federated_data = {f"client_{i}": fds.load_partition(i) for i in range(num_clients)}
    return fds, federated_data  # Return both fds and federated_data

# Load and visualize data
fds, federated_data = split_mnist_dirichlet_flwr()  # Get both fds and federated_data
plot_label_distributions(fds.partitioners["train"], label_name="label")

# Function to create DataLoader for each client
def get_dataloader(client_data, batch_size=BATCH_SIZE):
    transform = transforms.Compose([
        transforms.ToTensor()  # Chuyển ảnh PIL thành tensor có giá trị trong khoảng [0,1]
    ])

    # Chuyển đổi danh sách ảnh từ PIL.Image thành tensor
    x_tensor = torch.stack([transform(img) for img in client_data["image"]])
    y_tensor = torch.tensor(client_data["label"], dtype=torch.long)

    dataset = TensorDataset(x_tensor, y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define Neural Network Model
class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 7 * 7, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))

        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define PPAN Encoder and Adversary
class PPAN_Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        return self.fc2(F.relu(self.fc1(x))).detach()

class PPAN_Adversary(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(PPAN_Adversary, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define PrivacyMechanism with encrypt and decrypt functions
class PrivacyMechanism(nn.Module):
    def __init__(self, input_dim, noise_scale=NOISE_SCALE):
        super().__init__()
        self.encoder = PPAN_Encoder(input_dim, 64)
        self.adversary = PPAN_Adversary(input_dim, 64)
        self.noise_scale = noise_scale

    def encrypt(self, x):
        generated = self.encoder(x)
        if self.training:
            noise = torch.randn_like(generated) * self.noise_scale
            generated = generated + noise
        return generated

    def decrypt(self, encrypted):
        decoded = self.adversary(encrypted)
        return decoded

    def forward(self, x):
        encrypted = self.encrypt(x)
        decoded = self.decrypt(encrypted)
        return encrypted, decoded

# Compute Privacy Leakage using mutual_info_regression
def compute_privacy_leakage(encrypted_weights, original_weights):
    encrypted_weights = np.array(encrypted_weights)  # Chuyển đổi sang numpy array
    original_weights = np.array(original_weights)  # Chuyển đổi sang numpy array

    encrypted_2d = encrypted_weights.reshape(-1, 1)
    original_2d = original_weights.reshape(-1, 1)

    min_length = min(len(encrypted_2d), len(original_2d))
    encrypted_2d = encrypted_2d[:min_length]
    original_2d = original_2d[:min_length]

    try:
        mi_score = mutual_info_regression(encrypted_2d, original_2d.ravel())[0]
    except ValueError:
        mi_score = 0.0

    return mi_score

# Compute Distortion
def compute_distortion(original_weights, encrypted_weights):
    original_weights = np.array(original_weights)
    encrypted_weights = np.array(encrypted_weights)
    return np.mean((original_weights - encrypted_weights) ** 2)

# Evaluate model accuracy on test set
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total

# Define PrivacyClient for Federated Learning
class PrivacyClient(fl.client.NumPyClient):
    def __init__(self, model, train_loader, test_loader):
        self.model = model.to(DEVICE)
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.param_shapes = [p.shape for p in self.model.parameters()]
        self.total_params = sum(p.numel() for p in self.model.parameters())
        self.privacy_mech = PrivacyMechanism(self.total_params, noise_scale=NOISE_SCALE).to(DEVICE)
        self.optimizer = optim.Adam(
            list(self.model.parameters()) + list(self.privacy_mech.parameters()),
            lr=LEARNING_RATE
        )

    def get_parameters(self, config=None):
        """Trả về tham số của mô hình dưới dạng danh sách NumPy arrays"""
        return [p.detach().cpu().numpy() for p in self.model.parameters()]

    def set_parameters(self, parameters):
        """Cập nhật mô hình với danh sách NumPy arrays"""
        ndarray_params = [torch.tensor(p, dtype=torch.float32, device=DEVICE) for p in parameters]
        params_dict = zip(self.model.state_dict().keys(), ndarray_params)
        state_dict = {k: v for k, v in params_dict}
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters: NDArrays, config: Dict[str, Scalar]) -> Tuple[NDArrays, int, Dict[str, Scalar]]:
        self.set_parameters(parameters)
        self.model.train()
        self.privacy_mech.train()

        total_loss = 0
        correct = 0
        total = 0  # Tổng số mẫu thực tế

        for images, labels in self.train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            self.optimizer.zero_grad()

            outputs = self.model(images)
            loss = F.cross_entropy(outputs, labels)

            loss.backward()
            self.optimizer.step()

            total_loss += loss.item() * labels.size(0)  # Nhân với số lượng mẫu trong batch
            correct += (outputs.argmax(dim=1) == labels).sum().item()
            total += labels.size(0)  # Cập nhật tổng số mẫu

        # Chia loss theo tổng số mẫu thay vì số batch
        avg_loss = total_loss / total
        accuracy = correct / total  # Tính chính xác theo số mẫu

        # Get model parameters and apply privacy mechanism
        with torch.no_grad():
            params = [p.detach().cpu().numpy() for p in self.model.parameters()]
            flat_params = np.concatenate([p.flatten() for p in params])
            flat_params_tensor = torch.tensor(flat_params, dtype=torch.float32).unsqueeze(0).to(DEVICE)
            encrypted_params = self.privacy_mech.encrypt(flat_params_tensor)
            encrypted_np = encrypted_params.detach().cpu().numpy()

            privacy_leakage = float(compute_privacy_leakage(encrypted_np, flat_params))
            distortion = float(compute_distortion(flat_params, encrypted_np.flatten()))

        round_number = int(config.get("round", 0))

        # Return tuple with correct type signature
        return params, len(self.train_loader.dataset), {
            "loss": float(avg_loss),
            "accuracy": float(accuracy),
            "privacy_leakage": privacy_leakage,
            "distortion": distortion
        }

    def evaluate(self, parameters: NDArrays, config: Dict[str, Scalar]) -> Tuple[float, int, Dict[str, Scalar]]:
        self.set_parameters(parameters)
        self.model.eval()

        total_loss, correct = 0, 0
        with torch.no_grad():
            for images, labels in self.test_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                outputs = self.model(images)
                loss = F.cross_entropy(outputs, labels)
                total_loss += loss.item()

                predicted = outputs.argmax(dim=1)
                correct += (predicted == labels).sum().item()

            flat_params = torch.cat([p.view(-1) for p in self.model.parameters()]).unsqueeze(0).to(DEVICE)
            encrypted_params = self.privacy_mech.encrypt(flat_params)
            encrypted_np = np.array(encrypted_params.cpu().numpy().flatten(), dtype=np.float32)
            original_np = flat_params.cpu().numpy().flatten()

            privacy_leakage = compute_privacy_leakage(encrypted_np, original_np)
            distortion = compute_distortion(original_np, encrypted_np)

        avg_loss = total_loss / len(self.test_loader)
        accuracy = correct / len(self.test_loader.dataset)

        round_number = int(config.get("round", 0))
        save_metric_to_file("eval_loss", avg_loss, round_number)
        save_metric_to_file("eval_accuracy", accuracy, round_number)
        save_metric_to_file("eval_privacy_leakage", privacy_leakage, round_number)
        save_metric_to_file("eval_distortion", distortion, round_number)

        return avg_loss, len(self.test_loader.dataset), {
            "accuracy": float(accuracy),
            "privacy_leakage": float(privacy_leakage),
            "distortion": float(distortion)
        }
def aggregate_weighted_parameters(results: List[Tuple[NDArrays, int]]) -> NDArrays:
    """Aggregate model parameters using weighted average."""
    # Calculate total number of examples used during training
    total_examples = sum(num_examples for _, num_examples in results)

    if total_examples == 0:
        return None

    # Get parameter shape from first result
    params_shape = [param.shape for param in results[0][0]]
    weighted_params = [np.zeros_like(param) for param in results[0][0]]

    # Calculate weighted parameters
    for parameters, num_examples in results:
        weight = num_examples / total_examples
        for i, param in enumerate(parameters):
            weighted_params[i] += param * weight

    return weighted_params

class FedAvg_Privacy(fl.server.strategy.FedAvg):
    """Federated Averaging with Privacy strategy."""

    def __init__(
        self,
        fraction_fit: float = 1.0,
        fraction_evaluate: float = 1.0,
        min_fit_clients: int = 2,
        min_evaluate_clients: int = 2,
        min_available_clients: int = 2,
        evaluate_fn: Optional[
            Callable[[int, NDArrays, Dict[str, Scalar]],
                    Optional[Tuple[float, Dict[str, Scalar]]]]
        ] = None,
        on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        accept_failures: bool = True,
        initial_parameters: Optional[Parameters] = None,
        fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        noise_scale: float = NOISE_SCALE,
    ) -> None:
        """Initialize FedAvg with Privacy strategy."""
        super().__init__(
            fraction_fit=fraction_fit,
            fraction_evaluate=fraction_evaluate,
            min_fit_clients=min_fit_clients,
            min_evaluate_clients=min_evaluate_clients,
            min_available_clients=min_available_clients,
            evaluate_fn=evaluate_fn,
            on_fit_config_fn=on_fit_config_fn,
            on_evaluate_config_fn=on_evaluate_config_fn,
            accept_failures=accept_failures,
            initial_parameters=initial_parameters,
            fit_metrics_aggregation_fn=fit_metrics_aggregation_fn,
            evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn,
        )
        self.noise_scale = noise_scale

    def aggregate_fit(
        self,
        server_round: int,
        results: list[tuple[ClientProxy, FitRes]],
        failures: list[BaseException],
    ) -> tuple[Optional[Parameters], dict[str, Scalar]]:
        """Aggregate fit results using weighted average with privacy."""
        if not results:
            return None, {}

        # Do not aggregate if there are failures and failures are not accepted
        if not self.accept_failures and failures:
            return None, {}

        # Call parent's aggregate_fit to get aggregated parameters
        aggregated_result = super().aggregate_fit(server_round, results, failures)
        if aggregated_result is None:
            return None, {}

        parameters_aggregated, metrics = aggregated_result

        # Convert parameters to ndarrays
        ndarrays = parameters_to_ndarrays(parameters_aggregated)

        # Add noise to parameters for privacy
        parameters_noised = [
            p + np.random.normal(0, self.noise_scale, p.shape)
            for p in ndarrays
        ]

        # Aggregate metrics
        metrics_aggregated = {}
        if self.fit_metrics_aggregation_fn:
            fit_metrics = [(res.num_examples, res.metrics) for _, res in results]
            metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics)
        # Tính loss, accuracy, privacy leakage, distortion trên server
        avg_loss = np.mean([r.metrics["loss"] for _, r in results])
        avg_accuracy = np.mean([r.metrics["accuracy"] for _, r in results])
        avg_privacy_leakage = np.mean([r.metrics["privacy_leakage"] for _, r in results])
        avg_distortion = np.mean([r.metrics["distortion"] for _, r in results])

        # Lưu lại kết quả trên server
        save_server_metric(server_round, "loss", avg_loss, phase="train")
        save_server_metric(server_round, "accuracy", avg_accuracy, phase="train")
        save_server_metric(server_round, "privacy_leakage", avg_privacy_leakage, phase="train")
        save_server_metric(server_round, "distortion", avg_distortion, phase="train")

        # Convert back to Parameters and return
        return ndarrays_to_parameters(parameters_noised), metrics_aggregated


# Config functions for fit and evaluate
def fit_config(server_round: int) -> Dict[str, str]:
    config = {
        "learning_rate": LEARNING_RATE,
        "batch_size": BATCH_SIZE,
        "round": server_round,
    }
    return config

def evaluate_config(server_round: int) -> Dict[str, str]:
    config = {
        "batch_size": BATCH_SIZE,
        "round": server_round,
    }
    return config

def aggregate_fit_metrics(metrics: List[Tuple[int, Dict[str, Scalar]]]) -> Dict[str, Scalar]:
    aggregated_metrics = {}
    for _, client_metrics in metrics:
        for k, v in client_metrics.items():
            aggregated_metrics[k] = aggregated_metrics.get(k, 0) + v
    for k in aggregated_metrics:
        aggregated_metrics[k] /= len(metrics)
    return aggregated_metrics

# Federated Learning Simulation
def client_fn(context: Context) -> fl.client.Client:
    """Tạo một Flower client đại diện cho một tổ chức."""
    partition_id = context.node_config["partition-id"]

    # Kiểm tra nếu partition_id hợp lệ
    if f"client_{partition_id}" not in federated_data:
        raise ValueError(f"Client ID {partition_id} không tồn tại trong dữ liệu!")
    client_data = federated_data[f"client_{partition_id}"]
    train_loader = get_dataloader(client_data)
    test_loader = get_dataloader(client_data)
    model = MNISTModel()

    # Trả về client
    return PrivacyClient(model, train_loader, test_loader).to_client()

def main():
    strategy = FedAvg_Privacy(
        fraction_fit=1.0,
        fraction_evaluate=1.0,
        min_fit_clients=NUM_CLIENTS,
        min_evaluate_clients=NUM_CLIENTS,
        min_available_clients=NUM_CLIENTS,
        on_fit_config_fn=fit_config,
        on_evaluate_config_fn=evaluate_config,
        fit_metrics_aggregation_fn=aggregate_fit_metrics,
        noise_scale=NOISE_SCALE,
    )
    fl.simulation.start_simulation(
        client_fn=client_fn,
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS),
        strategy=strategy,
    )

if __name__ == "__main__":
    main()