# Setting up the experiment

In [4]:
!python -m pip install https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/intel_extension_for_pytorch-2.1.10+xpu-cp310-cp310-win_amd64.whl

Collecting intel-extension-for-pytorch==2.1.10+xpu
  Downloading https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/intel_extension_for_pytorch-2.1.10+xpu-cp310-cp310-win_amd64.whl (367.2 MB)
Installing collected packages: intel-extension-for-pytorch
Successfully installed intel-extension-for-pytorch-2.1.10+xpu


You should consider upgrading via the 'c:\Users\leon1\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


# Centralized training

In [14]:
import torch
import intel_extension_for_pytorch as ipex
import datetime
import partition_scripts
from neural_nets import get_parameters, set_parameters, train, test, Net, centralized_training, VGG7


DATA_STORE = {
    "CIFAR10_IID": None,
    "CIFAR10_NonIID": None,
    "CIFAR100_IID": None,
    "CIFAR100_NonIID": None,
    "FedFaces_IID": None,
    "FedFaces_NonIID": None,

}

In [15]:
experiments = ["CIFAR10", "CIFAR100", "CelebA", "FedFaces"]
epochs = 400

def run_centralized(experiment):
    match experiment:
        case "CIFAR10":
            DATA_STORE["CIFAR10"] = partition_scripts.partition_CIFAR_IID(2)
            dataloaders, valloaders, testloaders = DATA_STORE["CIFAR10"]
            net = VGG7(classes=10)
            centralized_training(trainloader=dataloaders[0], valloader=valloaders[0], testloader=testloaders, net=net, epochs=epochs, classes=10, DEVICE="cpu")
        case "CIFAR100":
            DATA_STORE["CIFAR100"] = partition_scripts.partition_CIFAR_IID(2, "CIFAR100")
            dataloaders, valloaders, testloaders = DATA_STORE["CIFAR100"]
            net = VGG7(classes=100)
            centralized_training(trainloader=dataloaders[0], valloader=valloaders[0], testloader=testloaders, epochs=epochs, classes=100)
        case "CelebA":
            DATA_STORE["CelebA"] = partition_scripts.partition_CelebA_IID(2)
            dataloaders, valloaders, testloaders = DATA_STORE["CelebA"]
            net = VGG7(classes=2, shape=(64, 64))
            centralized_training(trainloader=dataloaders[0], valloader=valloaders[0], testloader=testloaders, epochs=epochs, net=net)
        case "FedFaces":
            DATA_STORE["FedFaces"] = partition_scripts.partition_FedFaces_IID(2)
            dataloaders, valloaders, testloaders = DATA_STORE["FedFaces"]
            net = VGG7(classes=4, shape=(64,64))
            centralized_training(trainloader=dataloaders[0], valloader=valloaders[0], net=net, testloader=testloaders, epochs=epochs, classes=3)
        case _:
            pass



In [5]:
run_centralized(experiments[0])

Epoch 1: validation loss 0.02746488166809082, accuracy 0.344
Epoch 2: validation loss 0.024026091976165773, accuracy 0.4264
Epoch 3: validation loss 0.022680024433135985, accuracy 0.4616
Epoch 4: validation loss 0.021432508878707887, accuracy 0.52056
Epoch 5: validation loss 0.020805716314315797, accuracy 0.538
Epoch 6: validation loss 0.018972875962257385, accuracy 0.59696
Epoch 7: validation loss 0.01824114131450653, accuracy 0.59728
Epoch 8: validation loss 0.01795127950668335, accuracy 0.61104
Epoch 9: validation loss 0.016901257915496827, accuracy 0.61424
Epoch 10: validation loss 0.017062027134895325, accuracy 0.6008
Epoch 11: validation loss 0.017588741207122803, accuracy 0.60216
Epoch 12: validation loss 0.01673342333316803, accuracy 0.63192
Epoch 13: validation loss 0.014955187039375305, accuracy 0.67016
Epoch 14: validation loss 0.017368359627723692, accuracy 0.62392
Epoch 15: validation loss 0.015764442863464356, accuracy 0.66288
Epoch 16: validation loss 0.01915823311328888

In [6]:
run_centralized(experiments[1])

Epoch 1: validation loss 0.06441693063735962, accuracy 0.06976
Epoch 2: validation loss 0.05947630434036255, accuracy 0.12296
Epoch 3: validation loss 0.05646490665435791, accuracy 0.1512
Epoch 4: validation loss 0.053201660423278806, accuracy 0.19216
Epoch 5: validation loss 0.05117382019042969, accuracy 0.21968
Epoch 6: validation loss 0.050081351013183596, accuracy 0.24328
Epoch 7: validation loss 0.049269660911560056, accuracy 0.25688
Epoch 8: validation loss 0.05062286931991577, accuracy 0.25696
Epoch 9: validation loss 0.05255579975128174, accuracy 0.25688
Epoch 10: validation loss 0.05546677583694458, accuracy 0.25712
Epoch 11: validation loss 0.061887386436462404, accuracy 0.2492
Epoch 12: validation loss 0.06774099569320678, accuracy 0.24688
Epoch 13: validation loss 0.07612566272735596, accuracy 0.24784
Epoch 14: validation loss 0.08541844493865967, accuracy 0.24088
Epoch 15: validation loss 0.09222926691055298, accuracy 0.234
Epoch 16: validation loss 0.09543277019500733, ac

In [16]:

run_centralized(experiments[2])

Loaded from NPZ
Epoch 1: validation loss 0.010139092280890641, accuracy 0.6296818364881193
Epoch 2: validation loss 0.009425596344917473, accuracy 0.693717277486911
Epoch 3: validation loss 0.008470487400163243, accuracy 0.7470801449859041
Epoch 4: validation loss 0.008511355182023188, accuracy 0.7380185259766412
Epoch 5: validation loss 0.008332371537694037, accuracy 0.7505034232782923
Epoch 6: validation loss 0.008443343353713178, accuracy 0.7557390253725332
Epoch 7: validation loss 0.008134617387312245, accuracy 0.7484897301651229
Epoch 8: validation loss 0.00910601230504001, accuracy 0.7225130890052356
Epoch 9: validation loss 0.009103907040420483, accuracy 0.738824003221909
Epoch 10: validation loss 0.007724152119515934, accuracy 0.7720499395892066
Epoch 11: validation loss 0.008195803493585407, accuracy 0.765807490938381
Epoch 12: validation loss 0.007608979322283296, accuracy 0.7750704792589609
Epoch 13: validation loss 0.007976552428759446, accuracy 0.7762786951268627
Epoch 14:

In [7]:
run_centralized(experiments[-1])

Minority class count: 9266
Epoch 1: validation loss 0.01593546709660141, accuracy 0.4782215796132297
Epoch 2: validation loss 0.016530669125471284, accuracy 0.40683173685161755
Epoch 3: validation loss 0.01581727283116859, accuracy 0.48093258630037955
Epoch 4: validation loss 0.015543200257413538, accuracy 0.4590637990240376
Epoch 5: validation loss 0.015726354773687064, accuracy 0.48436652810410263
Epoch 6: validation loss 0.015897256565766203, accuracy 0.4742454364720766
Epoch 7: validation loss 0.015633680625266072, accuracy 0.4832821254292427
Epoch 8: validation loss 0.015335834868751855, accuracy 0.5107536598590277
Epoch 9: validation loss 0.014758833758593597, accuracy 0.5338875835893728
Epoch 10: validation loss 0.01551174245991544, accuracy 0.5156334718958974
Epoch 11: validation loss 0.01624029322168656, accuracy 0.4431592264594253
Epoch 12: validation loss 0.014197020284655124, accuracy 0.5553949033074281
Epoch 13: validation loss 0.014759313191941821, accuracy 0.515994939454

# Setting up a FLWR environment

In [8]:
import flwr as fl
import flwr.server.strategy as strategy

from flwr.common import Metrics


today = datetime.datetime.today()
fl.common.logger.configure(identifier="FL Paper Experiment", filename=f"log_FLWR_{today.timestamp()}.txt")

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(
    f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}"
)

NUM_CLIENTS = 20
TRAINING_ROUNDS = 50

# Specify client resources if you need GPU (defaults to 1 CPU and 0 GPU)
client_resources = {"num_cpus": 1, "num_gpus": 0}

Training on cpu using PyTorch 2.1.0a0+cxx11.abi and Flower 1.7.0


Now, we'll set up the Client configurations

In [9]:
from typing import List, Tuple
from logging import DEBUG, INFO
from flwr.common.logger import log

class FlowerClient(fl.client.NumPyClient):
    def __init__(self, net, trainloader, valloader, cid):
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader
        self.cid = cid
        self.round = 0

    def get_parameters(self, config):
        # Return the current local parameters
        return get_parameters(self.net)

    def fit(self, parameters, config):
        # Train the local model after updating it with the given parameters
        # Return the parameters from the newly trained model, the length
        # of the training data, and a dict (empty in this case)
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs = 1)
        self.round+=1
        log(DEBUG, f"Client {self.cid} in round {self.round}")
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        # Perform the evaluation of the model after updating it with the given
        # parameters. Returns the loss as a float, the length of the validation
        # data, and a dict containing the accuracy
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.valloader)
        return loss, len(self.valloader), {'accuracy': float(accuracy)}


def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}

Then, setting up the strategies

#### FedAvg on CIFAR ####

In [10]:
# Create FedAvg strategy
fedAvg = fl.server.strategy.FedAvg(
    fraction_fit=1,  
    fraction_evaluate=0.5,  
    min_fit_clients=1,  
    min_evaluate_clients=1, 
    min_available_clients=1,
    evaluate_metrics_aggregation_fn=weighted_average,
)


# A couple of client_fns for using with Flower, one for each dataset experiment
def client_fn_CIFAR10_IID(cid: str) -> FlowerClient:
    """Create a Flower client representing a single organization."""

    # Create model
    net = Net().to(DEVICE)

    # Load data (CIFAR-10)
    trainloaders, valloaders,_ =  DATA_STORE["CIFAR10_IID"]
    # Note: each client gets a different trainloader/valloader, so each client
    # will train and evaluate on their own unique data
    trainloader = trainloaders[int(cid)]
    valloader = valloaders[int(cid)]

    # Create a  single Flower client representing a single organization
    return FlowerClient(net, trainloader, valloader, cid).to_client()

def client_fn_CIFAR10_nonIID(cid: str) -> FlowerClient:
    """Create a Flower client representing a single organization."""

    # Create model
    net = Net().to(DEVICE)

    # Load data (CIFAR-10)
    trainloaders, valloaders,_ =  DATA_STORE["CIFAR10_NonIID"]
    # Note: each client gets a different trainloader/valloader, so each client
    # will train and evaluate on their own unique data
    trainloader = trainloaders[int(cid)]
    valloader = valloaders[int(cid)]

    # Create a  single Flower client representing a single organization
    return FlowerClient(net, trainloader, valloader, cid).to_client()

A quick run to check if Flower is working ok

In [11]:
# Start simulation
run = False
if (run):
    fl.simulation.start_simulation(
        client_fn=client_fn_CIFAR10_IID,
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=2),
        strategy=fedAvg,
        client_resources=client_resources,
    )

### FedExperiment Class ###
Ok, so now i'll encapsulate this code to reuse with different strategies and datasets

In [12]:
class FedExperiment():

    def __init__(self, client_fn,strategy, name="New experiment"):
        self.client_fn = client_fn
        self.strategy = strategy
        self.name = name

    def simulate_FL(self, rounds=1):
        log(INFO, "\n" + 10 * "========" + "\n" + self.name + " has started\n" + 10 * "========"  )
        metrics = fl.simulation.start_simulation(
                            client_fn=self.client_fn,
                            num_clients=NUM_CLIENTS,
                            config=fl.server.ServerConfig(num_rounds=rounds),
                            strategy=self.strategy,
                            client_resources=client_resources,
                        )
        log(INFO, "\n" + 10 * "========" + "\n" + self.name + " has ended\n" + 10 * "========"  )
        return metrics

In [13]:
DATA_STORE["CIFAR10_IID"]= partition_scripts.partition_CIFAR_IID(NUM_CLIENTS, "CIFAR10")
exp_CIFAR10_IID = FedExperiment(client_fn=client_fn_CIFAR10_IID, strategy=fedAvg, name="CIFAR 10 - IID Distribution")
metrics = exp_CIFAR10_IID.simulate_FL(rounds=5)
print(metrics)
DATA_STORE["CIFAR10_IID"]= None

ValueError: Sum of input lengths does not equal the length of the input dataset!

In [None]:
DATA_STORE["CIFAR10_NonIID"] = partition_scripts.partition_CIFAR_nonIID(NUM_CLIENTS)
exp_CIFAR10_nonIID = FedExperiment(client_fn=client_fn_CIFAR10_nonIID, strategy=fedAvg, name="CIFAR 10 - nonIID Distribution")
metrics = exp_CIFAR10_nonIID.simulate_FL(rounds=5)
print(metrics)
DATA_STORE["CIFAR10_NonIID"] = partition_scripts.partition_CIFAR_nonIID(NUM_CLIENTS)

Shape CIFAR nonIID: (32, 32, 3)


INFO flwr 2024-02-23 14:24:39,699 | 527930960.py:9 | 
CIFAR 10 - nonIID Distribution has started
INFO flwr 2024-02-23 14:24:39,699 | app.py:178 | Starting Flower simulation, config: ServerConfig(num_rounds=5, round_timeout=None)
2024-02-23 14:24:45,001	INFO worker.py:1621 -- Started a local Ray instance.
INFO flwr 2024-02-23 14:24:46,803 | app.py:213 | Flower VCE: Ray initialized with resources: {'memory': 5832088782.0, 'object_store_memory': 2916044390.0, 'node:127.0.0.1': 1.0, 'CPU': 12.0, 'node:__internal_head__': 1.0}
INFO flwr 2024-02-23 14:24:46,818 | app.py:219 | Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO flwr 2024-02-23 14:24:46,819 | app.py:242 | Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 0}
INFO flwr 2024-02-23 14:24:46,819 | app.py:288 | Flower VCE: Creating VirtualClientEngineActorPool with 12 actors
INFO flwr 2024-02-23 14:24:46,819 | server.py:89 | Initializing global paramet