In [1]:
!pip install -q flwr torch torchvision tensorboard
!pip install -U "flwr[simulation]"
!pip install timm

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pydrive2 1.21.3 requires cryptography<44, but you have cryptography 44.0.2 which is incompatible.
pyopenssl 24.2.1 requires cryptography<44,>=41.0.5, but you have cryptography 44.0.2 which is incompatible.[0m[31m
Collecting ray==2.31.0 (from flwr[simulation])
  Downloading ray-2.31.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (13 kB)
Downloading ray-2.31.0-cp311-cp311-manylinux2014_x86_64.whl (66.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.31.0


In [2]:
# upload data_utils, clients, data_preprocessing and strategies py modules

In [3]:
from datetime import datetime
from functools import partial
from flwr.server import ServerConfig
import random
import numpy as np
import torch.nn as nn
from torchvision import transforms
import timm
import torch
import os
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
from torch.utils.tensorboard import SummaryWriter
import flwr as fl
from flwr.simulation import run_simulation
from flwr.common import Context
from flwr.server import ServerApp, ServerAppComponents, ServerConfig
from flwr.client import ClientApp
from flwr.common import parameters_to_ndarrays
from collections import OrderedDict
import data_utils
import clients
import strategies
import data_preprocessing

In [4]:
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)

In [5]:
DATA_DIR = './data'
NUM_CLIENTS = 5
NC = 20
VAL_SPLIT = 0.1
BATCH_SIZE = 64

In [6]:
pipeline = data_preprocessing.CIFAR100Pipeline(val_split=VAL_SPLIT, use_augment=True)
trainset, valset, testset = pipeline.run_pipeline()

trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
valloader = DataLoader(valset, batch_size=BATCH_SIZE)
testloader = DataLoader(testset, batch_size=BATCH_SIZE)

# Apply sharding
client_datasets_iid = data_utils.iid_split(trainset, NUM_CLIENTS)
client_datasets_noniid = data_utils.non_iid_split(trainset, NUM_CLIENTS, NC, 100)

# Setup logging/checkpoint directories
LOG_DIR = "/content/fed_logs"
CKPT_PATH = "/content/fed_checkpoints/model.pt"

print("Setup complete. IID and Non-IID splits created.")

100%|██████████| 169M/169M [00:13<00:00, 12.7MB/s]


Setup complete. IID and Non-IID splits created.


In [7]:
# Create model
def create_dino_vit_s16_for_cifar100(freezing=True):
    model = timm.create_model("vit_small_patch16_224_dino", pretrained=True, num_classes=0)

    # Replace the head with CIFAR-100 classification head
    model.head = nn.Linear(model.num_features, 100)

    if freezing:
      # Freeze all parameters except head
      for param in model.parameters():
          param.requires_grad = False

      # Unfreeze only the head
      for param in model.head.parameters():
          param.requires_grad = True

    return model

In [13]:
run_name = f"run_iid_data_{datetime.now().strftime('%Y%m%d-%H%M%S')}"
writer = SummaryWriter(log_dir=f"{LOG_DIR}/{run_name}")
print(f"Logging to {LOG_DIR}")

strategy = strategies.FedAvg(
    writer=writer,
    fraction_fit=1.0,
    min_fit_clients=NUM_CLIENTS,
    min_available_clients=NUM_CLIENTS,
    evaluate_metrics_aggregation_fn = lambda metrics: {
        "accuracy": sum(num * m["val_accuracy"] for num, m in metrics) / sum(num for num, _ in metrics)
    }
)

Logging to /content/fed_logs


In [15]:
optimizer_config = {
    "lr": 0.01,               # Learning rate (required)
    "momentum": 0.9,          # Momentum term
    "dampening": 0.0,         # Dampening for momentum (usually 0)
    "weight_decay": 0.0005,   # L2 regularization
    "nesterov": False         # Whether to use Nesterov accelerated gradients
}

scheduler_config = {
    "T_max": 100,     # Required: max number of iterations (can be total batches or epochs)
    "eta_min": 0.0,   # Minimum learning rate (default is 0.0)
    "last_epoch": -1  # Use -1 to start from scratch
}

#set batchsize in client class
client_app = ClientApp(
    client_fn=clients.build_client_fn(
        use_iid=True,
        optimizer_type=clients.OptimizerType.SGD,
        scheduler_type=clients.SchedulerType.COSINE,
        optimizer_config=optimizer_config,
        scheduler_config=scheduler_config,
        iid_partitions=client_datasets_iid,
        non_iid_partitions=client_datasets_noniid,
        model_fn=create_dino_vit_s16_for_cifar100,
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        valset=valset,
        local_epochs=2
    )
)

In [16]:
def server_fn(context: Context) -> ServerAppComponents:
    """Construct components that set the ServerApp behaviour.

    You can use the settings in `context.run_config` to parameterize the
    construction of all elements (e.g the strategy or the number of rounds)
    wrapped in the returned ServerAppComponents object.
    """
    config = ServerConfig(num_rounds=5)
    return ServerAppComponents(strategy=strategy, config=config)


server_app = ServerApp(server_fn=server_fn)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 0.0}}

if device.type == "cuda":
    backend_config = {"client_resources": {"num_cpus": 1, "num_gpus": 1.0}}

In [17]:
run_simulation(
    server_app=server_app,
    client_app=client_app,
    num_supernodes=NUM_CLIENTS,
    backend_config=backend_config
)

[92mINFO [0m:      Starting Flower ServerApp, config: num_rounds=5, no round_timeout
[92mINFO [0m:      
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Requesting initial parameters from one random client
[36m(pid=4314)[0m 2025-04-17 09:11:39.417589: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(pid=4314)[0m E0000 00:00:1744881099.439874    4314 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=4314)[0m E0000 00:00:1744881099.446734    4314 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(ClientAppActor pid=4314)[0m   model = create_fn(
[36m(ClientAppActor pid=4314)[0m   self.scaler = GradScaler()
[92mINFO [0m:      Received initial paramete

In [18]:
final_parameters = strategy.latest_parameters

# Convert Flower parameters to list of numpy arrays
ndarrays = parameters_to_ndarrays(final_parameters)

# Load them into a PyTorch model
iid_model = create_dino_vit_s16_for_cifar100()
iid_model.to(device)
state_dict = OrderedDict(
    (key, torch.tensor(val)) for key, val in zip(iid_model.state_dict().keys(), ndarrays)
)
iid_model.load_state_dict(state_dict)

correct, total, loss_total = 0, 0, 0.0
criterion = torch.nn.CrossEntropyLoss()

with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = iid_model(images)
        loss = criterion(outputs, labels)

        loss_total += loss.item() * labels.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_loss = loss_total / total
test_accuracy = correct / total

print(f"✅ Test Accuracy: {test_accuracy:.4f} | Test Loss: {test_loss:.4f}")

  model = create_fn(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Test Accuracy: 0.7564 | Test Loss: 1.1120
