In [1]:
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple

import matplotlib.pyplot as plt

import numpy as np
import torch
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import albumentations as A
from torch.cuda.amp.grad_scaler import GradScaler
from torch.cuda.amp.autocast_mode import autocast
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader, random_split
import os
from PIL import Image
import flwr as fl
import torch.optim as optim
from flwr.common import Metrics
import wandb
from typing import List, Union
from flwr.common import Parameters, Scalar
from flwr.server.client_proxy import ClientProxy, FitRes
import copy

from torch.nn.parallel import DistributedDataParallel as DDP
from torchmetrics import JaccardIndex


DEVICE = torch.device("cuda")  # Try "cuda" to train on GPU
print(
    f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}"
)

  from .autonotebook import tqdm as notebook_tqdm


Training on cuda using PyTorch 2.0.1+cu117 and Flower 1.4.0


#### Set up initial parameters

In [2]:
NUM_CLIENTS = 15
BATCH_SIZE = 8
IMAGE_HEIGHT = 240
IMAGE_WIDTH = 240

#### Data Transform Functions

In [3]:
# Define the transform
transform = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Rotate(limit=35, p=1.0),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.1),
            A.Normalize(
                mean=[0.0, 0.0, 0.0],
                std=[1.0, 1.0, 1.0],
                max_pixel_value=255.0,
            ),
            ToTensorV2(),
        ],
    )

# used to transform validation set
val_transforms = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

In [4]:
def adjusted_random_split(dataset, lengths):
    try:
        subsets = random_split(dataset, lengths)
    except ValueError as e:
        if str(e) == "Sum of input lengths does not equal the length of the input dataset!":
            print("Length mismatch detected. Adjusting lengths to match dataset.")
            lengths[-1] += len(dataset) - sum(lengths)  # adjust last split length to match dataset length
            subsets = random_split(dataset, lengths)  # retry the split
        else:
            raise  # re-raise the exception if it's not what we're expecting
    return subsets

#### Load Data

In [5]:
class WoundDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0

        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]
        return image, mask #, self.images[index]

# Load images and masks 
trainset = WoundDataset("../../wound_data/data/woundData/train_images", "../../wound_data/data/woundData/train_masks", transform=transform)
testset = WoundDataset("../../wound_data/data/woundData/val_images", "../../wound_data/data/woundData/val_masks", transform=val_transforms)


# Split training set into 10 partitions to simulate the individual dataset
partition_size = len(trainset) // NUM_CLIENTS
lengths = [partition_size] * NUM_CLIENTS
# print(f"len(trainset)  {len(trainset) }")
# print(f"partition_size: {partition_size}")
# print(f"lengths: {lengths}")

#datasets = random_split(trainset, lengths)
datasets = adjusted_random_split(trainset, lengths)

# Split each partition into train/val and create DataLoader
trainloaders = []
valloaders = []
testloader = DataLoader(testset, batch_size=BATCH_SIZE)

print(f'batch size = {BATCH_SIZE}')
for ds in datasets:
    len_val = len(ds) // 20  # 10 % validation set
    print(f'len val = {len_val}')
    print(f'len(ds) = {len(ds)}')
    len_train = len(ds) - len_val
    print(f'len_train = {len_train}')
    lengths = [len_train, len_val]
    print(f'lengths = {lengths}')
    ds_train, ds_val = random_split(ds, lengths)
    
    trainloaders.append(DataLoader(ds_train, batch_size=BATCH_SIZE))
    valloaders.append(DataLoader(ds_val, batch_size=BATCH_SIZE))

    
testloader = DataLoader(testset, batch_size=BATCH_SIZE)



Length mismatch detected. Adjusting lengths to match dataset.
batch size = 8
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 40
len_train = 38
lengths = [38, 2]
len val = 2
len(ds) = 50
len_train = 48
lengths = [48, 2]


In [6]:
# images, labels, filenames  = next(iter(testloader))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# images = images.permute(0, 2, 3, 1).numpy()
# # Denormalize
# images = images / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(20, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(images[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [7]:
# images, labels, filenames = next(iter(testloader))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# labels = labels.numpy()
# # Denormalize
# labels = labels / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(24, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(labels[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [8]:
# images, labels, filenames = next(iter(trainloaders[0]))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# images = images.permute(0, 2, 3, 1).numpy()
# # Denormalize
# images = images / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(24, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(images[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [9]:
# images, labels, filenames = next(iter(trainloaders[0]))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# labels = labels.numpy()
# # Denormalize
# labels = labels / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(24, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(labels[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [10]:
# images, labels, filenames = next(iter(valloaders[0]))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# labels = labels.numpy()
# # Denormalize
# labels = labels / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(24, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(labels[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [11]:
# images, labels, filenames = next(iter(valloaders[0]))

# # Reshape and convert images to a NumPy array
# # matplotlib requires images with the shape (height, width, 3)
# images = images.permute(0, 2, 3, 1).numpy()
# # Denormalize
# images = images / 2 + 0.5

# # Create a figure and a grid of subplots
# fig, axs = plt.subplots(4, 4, figsize=(24, 10))

# # Loop over the images and plot them
# for i, ax in enumerate(axs.flat):
#     ax.imshow(images[i])
#     ax.set_title(filenames[i])
#     ax.axis("off")

# # Show the plot
# fig.tight_layout()
# plt.show()

In [12]:
# how to add dropout in UNET architecture
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)

class UNET(nn.Module):
    def __init__(
            self, in_channels=3, out_channels=1, features=[64, 128, 256, 512],
    ):
        super(UNET, self).__init__()
        self.ups = nn.ModuleList()
        self.downs = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Down part of UNET
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature

        # Up part of UNET
        for feature in reversed(features):
            self.ups.append(
                nn.ConvTranspose2d(
                    feature*2, feature, kernel_size=2, stride=2,
                )
            )
            self.ups.append(DoubleConv(feature*2, feature))

        self.bottleneck = DoubleConv(features[-1], features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skip_connections = []

        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skip_connections = skip_connections[::-1]

        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip_connection = skip_connections[idx//2]

            if x.shape != skip_connection.shape:
                x = TF.resize(x, size=skip_connection.shape[2:])

            concat_skip = torch.cat((skip_connection, x), dim=1)
            x = self.ups[idx+1](concat_skip)

        return self.final_conv(x)

In [13]:
def confusion_matrix(preds, y):
    """
    Compute confusion matrix for binary segmentation.

    Args:
        preds: Predictions from the model. Assumes binary values after thresholding.
        y: Ground truth labels.

    Returns:
        tn: True negatives
        fp: False positives
        fn: False negatives
        tp: True positives
    """
    tp = (y * preds).sum().to(torch.float32)
    tn = ((1 - y) * (1 - preds)).sum().to(torch.float32)
    fp = ((1 - y) * preds).sum().to(torch.float32)
    fn = (y * (1 - preds)).sum().to(torch.float32)

    return tn, fp, fn, tp

def check_accuracy(loader, model, device="cuda"):
    """Test the network on the training set."""
    print("~~~~ In test ~~~~")
    criterion = torch.nn.BCEWithLogitsLoss()
    loss = 0    
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    iou_score  = 0
    result = []
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            loss += criterion(preds, y).item()
            tn, fp, fn, tp = confusion_matrix(preds, y)
            preds = (preds > 0.5).float()
            num_correct += (preds == y).sum()
            num_pixels += torch.numel(preds)
            dice_score += (2 * (preds * y).sum()) / (
                (preds.sum() + y.sum()) + 1e-8
            )
            # Calculate IoU score
            intersection = (preds * y).sum()
            union = (preds + y).sum() - intersection
            iou_score += (intersection + 1e-8) / (union + 1e-8)

    num_batches = len(loader)
    loss /= num_batches
    acc = num_correct/num_pixels*100
    diceS = dice_score/num_batches
    iouS = iou_score/num_batches
    correct_pixel = num_correct
    total_pixel = num_pixels
    print(f"True Negatives: {tn}")
    print(f"False Positives: {fp}")
    print(f"False Negatives: {fn}")
    print(f"True Positives: {tp}")
    print(f"IoU Score = {iouS}")
    print(f"Dice Score = {diceS}")
    print("~~~~~ Out of test ~~~~~")

    model.train()
    
    result = [acc.item(), diceS.item(), iouS.item(), loss, correct_pixel.item(), total_pixel]
    
    return result
  

# def train(net, trainloader, epochs: int, lr, device="cuda"):
#     """Train the network on the training set."""
#     print("~~~~ In train ~~~~")
#     criterion = torch.nn.BCEWithLogitsLoss() #loss_fn
#     optimizer = torch.optim.Adam(net.parameters(), lr)
#     scaler = GradScaler() #torch.cuda.amp.
#     net.train()
#     for epoch in range(epochs):
#         print(f'epoch => {epoch}')
#         print(len(trainloader))
#         correct, total, epoch_loss = 0, 0, 0.0
#         for images, labels in trainloader:
#             # examine image integrity here
#             images = images.to(DEVICE)
#             labels = labels.float().unsqueeze(1).to(device=DEVICE)
#             # forward
#             with torch.cuda.amp.autocast():
#                 outputs = net(images)
#                 loss = criterion(outputs, labels)
#         # backward
#         optimizer.zero_grad()
#         scaler.scale(loss).backward()
#         scaler.step(optimizer)
#         scaler.update()
#         print("end of epoch")
#     print("~~~~ Out of train ~~~~")

def train(net, trainloader, epochs: int, lr, device="cuda"):
    """Train the network on the training set."""
    print("~~~~ In train ~~~~")


    criterion = nn.CrossEntropyLoss()#torch.nn.BCEWithLogitsLoss() #loss_fn
    optimizer = torch.optim.Adam(net.parameters(), lr)
    scaler = GradScaler() #torch.cuda.amp.

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)
    net.to(DEVICE)     
    net.train()
    
    for epoch in range(epochs):
        print(f'epoch => {epoch}')
        print(f'length of training data {len(trainloader)}')
        for images, labels in trainloader:
            # examine image integrity here
            images = images.to(DEVICE)
            labels = labels.float().unsqueeze(1).to(device=DEVICE)
            # forward
            with torch.cuda.amp.autocast():
                outputs = net(images)
                loss = criterion(outputs, labels)
            # backward 
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            print("end of epoch")
    print("~~~~ Out of train ~~~~")


#### Utility Functions

In [14]:
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    print("_____metrics_______")
    print(metrics)
    print(">>>>>>>>>>>")

    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] # Original
    dice = [ num_examples *m["dice_score"] for num_examples, m in metrics]
    iouS = [num_examples * m["iouS"] for num_examples, m in metrics]
    loss = [num_examples * m["loss"] for num_examples, m in metrics]

    #accuracies = [m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]
    print("!!!accuracies ", str(accuracies))
    print("!!!dice ", str(dice))
    print("!!!examples ", str(examples))
    print("!!!iouS ", str(iouS))
    # Aggregate and return custom metric (weighted average)
    acc = sum(accuracies)/sum(examples)
    dice_score = sum(dice)/sum(examples)
    iou_score = sum(iouS)/sum(examples)
    loss = sum(loss)/sum(examples)

    #wandb.log({"acc": acc, "dice_score": dice_score, "iou_score": iou_score, "loss": loss})

    return {"accuracies": acc, "dice_score": dice_score, "iouS": iou_score, "loss": loss}

In [15]:
class SaveModelStrategy(fl.server.strategy.FedAvg):
    def aggregate_fit(
        self,
        server_round: int,
        results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]],
        failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
    ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:
        """Aggregate model weights using weighted average and store checkpoint"""

        # Call aggregate_fit from base class (FedAvg) to aggregate parameters and metrics
        aggregated_parameters, aggregated_metrics = super().aggregate_fit(server_round, results, failures)

        if aggregated_parameters is not None:
            print(f"Saving round {server_round} aggregated_parameters...")

            # Convert `Parameters` to `List[np.ndarray]`
            aggregated_ndarrays: List[np.ndarray] = fl.common.parameters_to_ndarrays(aggregated_parameters)

            # Convert `List[np.ndarray]` to PyTorch`state_dict`
            params_dict = zip(net.state_dict().keys(), aggregated_ndarrays)
            state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
            net.load_state_dict(state_dict, strict=True)

            # Save the model
            torch.save(net.state_dict(), f"models_simple_crossentLoss_15cl_0000001lr_test/model_round_{server_round}.pth")

        return aggregated_parameters, aggregated_metrics

In [16]:
def save_predictions_as_imgs(
    loader, model, client_id, folder="prediction_images/", device="cuda"
):
    model.eval()
    for idx, (x, y) in enumerate(loader):
        x = x.to(device=device)
        with torch.no_grad():
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
        torchvision.utils.save_image(
            preds, f"{folder}/pred_{idx}_{client_id}.png"
        )
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}_{client_id}.png")

    model.train()

In [17]:
#get local params
def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]


#update local model with global params
def set_parameters(device, net, parameters: List[np.ndarray]):
    print("in outside SP !!!!!!!!")
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)


#### Flower Client defintions

In [18]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, cid, net, trainloader, valloader, device, learning_rate, epochs):
        self.cid = cid
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader
        self.device = device
        self.epochs = epochs
        self.lr = learning_rate

    def set_parameters(self, parameters):
        print(f"[Client {self.cid}] set_parameters")
        # set_parameters(self.device, self.net, parameters)
        params_dict = zip(self.net.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        net.load_state_dict(state_dict, strict=True)

    def get_parameters(self, config):
        print(f"[Client {self.cid}] get_parameters")
        return [val.cpu().numpy() for _, val in self.net.state_dict().items()]
        #return get_parameters(self.net)

    def fit(self, parameters, config):
        print(f"[Client {self.cid}] fit, config: {config}")
        self.set_parameters(parameters)
        train(self.net, self.trainloader, self.epochs, self.lr, self.device)
        return self.get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        print(f"[Client {self.cid}] evaluate, config: {config}")
        self.set_parameters(parameters)
        #loss, accuracy = test(self.net, self.valloader)
        result = check_accuracy(self.valloader, self.net, self.device)
        # print some examples to a folderthu
        #save_predictions_as_imgs(self.valloader, self.net, self.cid, folder="prediction_images/",device=self.device)
        #[acc.item(), diceS.item(), iouS.item(), loss, correct_pixel.item(), total_pixel]
        print('~~~ loss = ', str(result[3]))
        return float(result[3]), len(self.valloader), {"accuracy": result[0], "dice_score": result[1], "iouS": result[2], "loss": result[3]}

def client_fn(cid) -> FlowerClient:
    net = UNET().to(DEVICE)
    trainloader = trainloaders[int(cid)]
    valloader = valloaders[int(cid)]
    lr = 0.00001
    epochs = 50
    print("~~~~client created~~~~")
    return FlowerClient(cid, net, trainloader, valloader, DEVICE, lr, epochs)

In [None]:
# Create an instance of the model and get the parameters

net = UNET().to(DEVICE)
params = get_parameters(net)

strategy = SaveModelStrategy(
    fraction_fit=1.0,
    fraction_evaluate=1.0,
    min_fit_clients=15,
    min_evaluate_clients=1,
    min_available_clients=NUM_CLIENTS,
    initial_parameters=fl.common.ndarrays_to_parameters(params),
    evaluate_metrics_aggregation_fn=weighted_average,
)

# Specify client resources if you need GPU (defaults to 1 CPU and 0 GPU)
client_resources = None
if DEVICE.type == "cuda":
    client_resources = {"num_gpus": 8}

# Start simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=100),  # Just three rounds
    strategy=strategy,
    client_resources=client_resources,
)

INFO flwr 2023-10-22 18:49:00,231 | app.py:146 | Starting Flower simulation, config: ServerConfig(num_rounds=100, round_timeout=None)
2023-10-22 18:49:06,889	INFO worker.py:1636 -- Started a local Ray instance.
INFO flwr 2023-10-22 18:49:08,126 | app.py:180 | Flower VCE: Ray initialized with resources: {'accelerator_type:V100': 1.0, 'GPU': 8.0, 'node:172.16.128.55': 1.0, 'memory': 1396947295232.0, 'object_store_memory': 200000000000.0, 'CPU': 36.0}
INFO flwr 2023-10-22 18:49:08,127 | server.py:86 | Initializing global parameters
INFO flwr 2023-10-22 18:49:08,127 | server.py:269 | Using initial parameters provided by strategy
INFO flwr 2023-10-22 18:49:08,128 | server.py:88 | Evaluating initial parameters
INFO flwr 2023-10-22 18:49:08,128 | server.py:101 | FL starting
DEBUG flwr 2023-10-22 18:49:08,129 | server.py:218 | fit_round 1: strategy sampled 15 clients (out of 15)


[2m[36m(launch_and_fit pid=3490307)[0m ~~~~client created~~~~
[2m[36m(launch_and_fit pid=3490307)[0m [Client 2] fit, config: {}
[2m[36m(launch_and_fit pid=3490307)[0m [Client 2] set_parameters
[2m[36m(launch_and_fit pid=3490307)[0m ~~~~ In train ~~~~
[2m[36m(launch_and_fit pid=3490307)[0m Let's use 8 GPUs!
[2m[36m(launch_and_fit pid=3490307)[0m epoch => 0
[2m[36m(launch_and_fit pid=3490307)[0m length of training data 5
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m epoch => 1
[2m[36m(launch_and_fit pid=3490307)[0m length of training data 5
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of epoch
[2m[36m(launch_and_fit pid=3490307)[0m end of e

[2m[36m(launch_and_fit pid=3495138)[0m *** SIGSEGV received at time=1698018707 on cpu 18 ***
[2m[36m(launch_and_fit pid=3495138)[0m PC: @     0x151fa7ae7667  (unknown)  (unknown)
[2m[36m(launch_and_fit pid=3495138)[0m     @     0x15555498ece0  (unknown)  (unknown)
[2m[36m(launch_and_fit pid=3495138)[0m [2023-10-22 18:51:47,147 E 3495138 3495273] logging.cc:361: *** SIGSEGV received at time=1698018707 on cpu 18 ***
[2m[36m(launch_and_fit pid=3495138)[0m [2023-10-22 18:51:47,148 E 3495138 3495273] logging.cc:361: PC: @     0x151fa7ae7667  (unknown)  (unknown)
[2m[36m(launch_and_fit pid=3495138)[0m [2023-10-22 18:51:47,148 E 3495138 3495273] logging.cc:361:     @     0x15555498ece0  (unknown)  (unknown)
[2m[36m(launch_and_fit pid=3495138)[0m Fatal Python error: Segmentation fault
[2m[36m(launch_and_fit pid=3495138)[0m 
[2m[36m(launch_and_fit pid=3495138)[0m Stack (most recent call first):
[2m[36m(launch_and_fit pid=3495138)[0m <no Python frame>


[2m[36m(launch_and_fit pid=3495285)[0m ~~~~client created~~~~
[2m[36m(launch_and_fit pid=3495285)[0m [Client 9] fit, config: {}
[2m[36m(launch_and_fit pid=3495285)[0m [Client 9] set_parameters
[2m[36m(launch_and_fit pid=3495285)[0m ~~~~ In train ~~~~
[2m[36m(launch_and_fit pid=3495285)[0m Let's use 8 GPUs!
[2m[36m(launch_and_fit pid=3495285)[0m epoch => 0
[2m[36m(launch_and_fit pid=3495285)[0m length of training data 5
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m epoch => 1
[2m[36m(launch_and_fit pid=3495285)[0m length of training data 5
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of epoch
[2m[36m(launch_and_fit pid=3495285)[0m end of e

[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>
[2m[36m(launch_and_fit pid=3513257)[0m <no Python frame>


[2m[36m(launch_and_fit pid=3513407)[0m ~~~~client created~~~~
[2m[36m(launch_and_fit pid=3513407)[0m [Client 10] fit, config: {}
[2m[36m(launch_and_fit pid=3513407)[0m [Client 10] set_parameters
[2m[36m(launch_and_fit pid=3513407)[0m ~~~~ In train ~~~~
[2m[36m(launch_and_fit pid=3513407)[0m Let's use 8 GPUs!
[2m[36m(launch_and_fit pid=3513407)[0m epoch => 0
[2m[36m(launch_and_fit pid=3513407)[0m length of training data 5
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m epoch => 1
[2m[36m(launch_and_fit pid=3513407)[0m length of training data 5
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of epoch
[2m[36m(launch_and_fit pid=3513407)[0m end of

ERROR flwr 2023-10-22 19:06:11,041 | ray_client_proxy.py:87 | [36mray::launch_and_fit()[39m (pid=3523035, ip=172.16.128.55)
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/flwr/simulation/ray_transport/ray_client_proxy.py", line 148, in launch_and_fit
    return maybe_call_fit(
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/flwr/client/client.py", line 184, in maybe_call_fit
    return client.fit(fit_ins)
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/flwr/client/app.py", line 297, in _fit
    results = self.numpy_client.fit(parameters, ins.config)  # type: ignore
  File "/tmp/ipykernel_3487255/760806206.py", line 26, in fit
  File "/tmp/ipykernel_3487255/2333540152.py", line 130, in train
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py", line 374, in step
    retval = self._maybe_opt_step(optimizer, optimizer_state, *args, **kwargs)
  File "/hom

Saving round 1 aggregated_parameters...


DEBUG flwr 2023-10-22 19:06:17,329 | server.py:168 | evaluate_round 1: strategy sampled 15 clients (out of 15)
[2m[36m(launch_and_evaluate pid=3523379)[0m 
[2m[36m(launch_and_evaluate pid=3523379)[0m CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(launch_and_evaluate pid=3523379)[0m For debugging consider passing CUDA_LAUNCH_BLOCKING=1.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_evaluate pid=3523379)[0m Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_evaluate pid=3523379)[0m 
[2m[36m(launch_and_evaluate pid=3523379)[0m 
[2m[36m(launch_and_evaluate pid=3523379)[0m 
[2m

[2m[36m(launch_and_fit pid=3525036)[0m ~~~~client created~~~~
[2m[36m(launch_and_fit pid=3525036)[0m [Client 12] fit, config: {}
[2m[36m(launch_and_fit pid=3525036)[0m [Client 12] set_parameters
[2m[36m(launch_and_fit pid=3525036)[0m ~~~~ In train ~~~~
[2m[36m(launch_and_fit pid=3525036)[0m Let's use 8 GPUs!
[2m[36m(launch_and_fit pid=3525036)[0m epoch => 0
[2m[36m(launch_and_fit pid=3525036)[0m length of training data 5
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m epoch => 1
[2m[36m(launch_and_fit pid=3525036)[0m length of training data 5
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of epoch
[2m[36m(launch_and_fit pid=3525036)[0m end of

DEBUG flwr 2023-10-22 19:17:45,150 | server.py:232 | fit_round 2 received 8 results and 7 failures


Saving round 2 aggregated_parameters...


DEBUG flwr 2023-10-22 19:17:47,330 | server.py:168 | evaluate_round 2: strategy sampled 15 clients (out of 15)


[2m[36m(launch_and_evaluate pid=3545162)[0m ~~~~client created~~~~
[2m[36m(launch_and_evaluate pid=3545162)[0m [Client 6] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=3545162)[0m [Client 6] set_parameters
[2m[36m(launch_and_evaluate pid=3545162)[0m ~~~~ In test ~~~~
[2m[36m(launch_and_evaluate pid=3545162)[0m True Negatives: 57607.984375
[2m[36m(launch_and_evaluate pid=3545162)[0m False Positives: 57435.015625
[2m[36m(launch_and_evaluate pid=3545162)[0m False Negatives: 78.64445495605469
[2m[36m(launch_and_evaluate pid=3545162)[0m True Positives: 78.35554504394531
[2m[36m(launch_and_evaluate pid=3545162)[0m IoU Score = 5.41711814294743e-12
[2m[36m(launch_and_evaluate pid=3545162)[0m Dice Score = 0.0
[2m[36m(launch_and_evaluate pid=3545162)[0m ~~~~~ Out of test ~~~~~
[2m[36m(launch_and_evaluate pid=3545162)[0m ~~~ loss =  0.9729287624359131
[2m[36m(launch_and_evaluate pid=3545253)[0m [Client 3] set_parameters
[2m[36m(launch_and_evaluate p

[2m[36m(launch_and_evaluate pid=3546213)[0m 2023-10-22 19:19:13,174	ERROR serialization.py:387 -- CUDA out of memory. Tried to allocate 36.00 MiB (GPU 0; 31.74 GiB total capacity; 83.44 MiB already allocated; 31.38 MiB free; 100.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
[2m[36m(launch_and_evaluate pid=3546213)[0m torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 36.00 MiB (GPU 0; 31.74 GiB total capacity; 83.44 MiB already allocated; 31.38 MiB free; 100.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
[2m[36m(launch_and_evaluate pid=3546213)[0m torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 36.00 MiB (GPU 0; 31.74 GiB total capacity;

_____metrics_______
[(1, {'accuracy': 0.4322916865348816, 'dice_score': 0.005962388589978218, 'iouS': 0.0029901082161813974, 'loss': 0.9752824902534485}), (1, {'accuracy': 98.39756774902344, 'dice_score': 0.0, 'iouS': 5.41711814294743e-12, 'loss': 0.9729287624359131}), (1, {'accuracy': 1.4019097089767456, 'dice_score': 0.027650559321045876, 'iouS': 0.014019097201526165, 'loss': 0.9787344336509705}), (1, {'accuracy': 0.913194477558136, 'dice_score': 0.018098613247275352, 'iouS': 0.009131944738328457, 'loss': 0.9720973372459412}), (1, {'accuracy': 1.2317708730697632, 'dice_score': 0.024335656315088272, 'iouS': 0.012317708693444729, 'loss': 0.9853651523590088}), (1, {'accuracy': 1.0086805820465088, 'dice_score': 0.01997215673327446, 'iouS': 0.010086805559694767, 'loss': 0.9715882539749146}), (1, {'accuracy': 99.59027862548828, 'dice_score': 0.0, 'iouS': 2.1186439755349618e-11, 'loss': 0.9525266289710999}), (1, {'accuracy': 2.4427084922790527, 'dice_score': 0.04768925905227661, 'iouS': 0.0

ERROR flwr 2023-10-22 19:19:27,000 | ray_client_proxy.py:87 | [36mray::launch_and_fit()[39m (pid=3546453, ip=172.16.128.55)
  At least one of the input arguments for this task could not be computed:
ray.exceptions.RaySystemError: System error: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

traceback: Traceback (most recent call last):
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/torch/storage.py", line 241, in _load_from_bytes
    return torch.load(io.BytesIO(b))
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-packages/torch/serialization.py", line 815, in load
    return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
  File "/home/mahdi.saeedi/.conda/envs/flwr_38/lib/python3.8/site-package

[2m[36m(launch_and_fit pid=3547595)[0m ~~~~client created~~~~
[2m[36m(launch_and_fit pid=3547595)[0m [Client 13] fit, config: {}
[2m[36m(launch_and_fit pid=3547595)[0m [Client 13] set_parameters
[2m[36m(launch_and_fit pid=3547595)[0m ~~~~ In train ~~~~
[2m[36m(launch_and_fit pid=3547595)[0m Let's use 8 GPUs!
[2m[36m(launch_and_fit pid=3547595)[0m epoch => 0
[2m[36m(launch_and_fit pid=3547595)[0m length of training data 5
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m epoch => 1
[2m[36m(launch_and_fit pid=3547595)[0m length of training data 5
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of epoch
[2m[36m(launch_and_fit pid=3547595)[0m end of

DEBUG flwr 2023-10-22 19:21:31,653 | server.py:232 | fit_round 3 received 1 results and 14 failures


Saving round 3 aggregated_parameters...


DEBUG flwr 2023-10-22 19:21:32,616 | server.py:168 | evaluate_round 3: strategy sampled 15 clients (out of 15)


[2m[36m(launch_and_evaluate pid=3549976)[0m ~~~~client created~~~~
[2m[36m(launch_and_evaluate pid=3549976)[0m [Client 1] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=3549976)[0m [Client 1] set_parameters
[2m[36m(launch_and_evaluate pid=3549976)[0m ~~~~ In test ~~~~
[2m[36m(launch_and_evaluate pid=3549976)[0m True Negatives: 58401.4921875
[2m[36m(launch_and_evaluate pid=3549976)[0m False Positives: 56329.5078125
[2m[36m(launch_and_evaluate pid=3549976)[0m False Negatives: 238.8252410888672
[2m[36m(launch_and_evaluate pid=3549976)[0m True Positives: 230.17477416992188
[2m[36m(launch_and_evaluate pid=3549976)[0m IoU Score = 2.1321961557463354e-11
[2m[36m(launch_and_evaluate pid=3549976)[0m Dice Score = 0.0
[2m[36m(launch_and_evaluate pid=3549976)[0m ~~~~~ Out of test ~~~~~
[2m[36m(launch_and_evaluate pid=3549976)[0m ~~~ loss =  0.9664674401283264
[2m[36m(launch_and_evaluate pid=3550058)[0m ~~~~client created~~~~
[2m[36m(launch_and_evaluate

DEBUG flwr 2023-10-22 19:23:01,196 | server.py:182 | evaluate_round 3 received 15 results and 0 failures
DEBUG flwr 2023-10-22 19:23:01,200 | server.py:218 | fit_round 4: strategy sampled 15 clients (out of 15)


_____metrics_______
[(1, {'accuracy': 1.4053819179534912, 'dice_score': 0.027718093246221542, 'iouS': 0.014053819701075554, 'loss': 0.9702839255332947}), (1, {'accuracy': 58.72135925292969, 'dice_score': 0.07095828652381897, 'iouS': 0.03678421676158905, 'loss': 0.9657198190689087}), (1, {'accuracy': 2.440972328186035, 'dice_score': 0.04765617102384567, 'iouS': 0.024409722536802292, 'loss': 0.9642267823219299}), (1, {'accuracy': 0.5095486044883728, 'dice_score': 0.01013930793851614, 'iouS': 0.0050954860635101795, 'loss': 0.9802927374839783}), (1, {'accuracy': 0.1336805671453476, 'dice_score': 0.002670041751116514, 'iouS': 0.001336805522441864, 'loss': 0.9836171269416809}), (1, {'accuracy': 1.0078126192092896, 'dice_score': 0.019955139607191086, 'iouS': 0.010078124701976776, 'loss': 0.974363386631012}), (1, {'accuracy': 98.76302337646484, 'dice_score': 0.0, 'iouS': 7.017543664222803e-12, 'loss': 0.9600923657417297}), (1, {'accuracy': 99.7013931274414, 'dice_score': 0.0, 'iouS': 2.9069767