<a href="https://colab.research.google.com/github/vs-152/FL-Contributions-Incentives-Project/blob/main/ISO_CIFAR10_OR_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np
!pip install pulp
import pulp
import copy
import time
from sklearn.model_selection import StratifiedShuffleSplit
import torchvision
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
from itertools import chain, combinations
from tqdm import tqdm
from scipy.special import comb
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




In [2]:
def print_solution(model):
    """Prints solution of the model nicely!"""

    print(f"status: {model.status}, {pulp.LpStatus[model.status]}")
    print(f"objective: {model.objective.value()}")
    for var in model.variables():
        print(f"{var.name}: {round(var.value(),3)}")

def noisify_MNIST(noise_rate, noise_type, x, y, perm=[], **kwargs):
    '''Returns a symmetrically noisy dataset
    or a an asymmetrically noisy dataset with permutation matrix perm.
    '''
    if (noise_rate == 0.):
        return y, []
    if 'seed' in kwargs:
        _, noise_idx = next(
            iter(StratifiedShuffleSplit(
                n_splits=1,
                test_size=noise_rate,
                random_state=kwargs['seed']).split(x, y)))
    else:
        _, noise_idx = next(iter(StratifiedShuffleSplit(
            n_splits=1, test_size=noise_rate).split(x, y)))
    y_noisy = y.copy()
    if (noise_type == 'symmetric'):
        for i in noise_idx:
            t1 = np.arange(10)
            t2 = np.delete(t1, y[i])
            y_noisy[i] = np.random.choice(t2, 1)
    elif (noise_type == 'asymmetric'):
        pure_noise = perm[y]
        for i in noise_idx:
            if (perm[y[i]] == y[i]):
                noise_idx = np.delete(noise_idx, np.where(noise_idx == i))
            else:
                y_noisy[i] = pure_noise[i]

    return y_noisy, noise_idx

def mnist_iid(dataset, num_users):
    """
    Sample I.I.D. client data from MNIST dataset
    :param dataset:
    :param num_users:
    :return: dict of image index
    """
    num_items = int(len(dataset)/num_users)
    dict_users, all_idxs = {}, [i for i in range(len(dataset))]
    for i in range(num_users):
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        all_idxs = list(set(all_idxs) - dict_users[i])

    return dict_users

def average_weights(w, fraction):  # this can also be used to average gradients
    """
    :param w: list of weights generated from the users
    :param fraction: list of fraction of data from the users
    :Returns the weighted average of the weights.
    """
    w_avg = copy.deepcopy(w[0]) #copy the weights from the first user in the list 
    for key in w_avg.keys():
        w_avg[key] *= torch.tensor(fraction[0]/sum(fraction), dtype=w_avg[key].dtype)
        for i in range(1, len(w)):
            w_avg[key] += w[i][key] * torch.tensor(fraction[0]/sum(fraction), dtype=w_avg[key].dtype)

    return w_avg

def calculate_gradients(new_weights, old_weights):
    """
    :param new_weights: list of weights generated from the users
    :param old_weights: old weights of a model, probably before training
    :Returns the list of gradients.
    """
    gradients = []
    for i in range(len(new_weights)):
        gradients.append(copy.deepcopy(new_weights[i]))
        for key in gradients[i].keys():
            gradients[i][key] -= old_weights[key]

    return gradients

def update_weights_from_gradients(gradients, old_weights):
    """
    :param gradients: gradients
    :param old_weights: old weights of a model, probably before training
    :Returns the updated weights calculated by: old_weights+gradients.
    """
    updated_weights = copy.deepcopy(old_weights)
    for key in updated_weights.keys():
        updated_weights[key] = old_weights[key] + gradients[key]

    return updated_weights
    


def powersettool(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

def least_core(char_function_dict, N):
    """Solves the least core LP problem.

    Args:
        N: number of participants.
        char_function_dict: dictionary with participants as keys and 
        corresponding characteristic function value as values
    """
    model = pulp.LpProblem('least_core', pulp.LpMinimize)
    x = {i: pulp.LpVariable(name=f'x({i})', lowBound=0) for i in range(1, N+1)}
    e = pulp.LpVariable(name='e')
    model += e # decision variable
    grand_coalition = tuple(i for i in range(1, N+1))
    model += pulp.lpSum(x) == char_function_dict[grand_coalition]
    for key, value in char_function_dict.items():
        model += pulp.lpSum(x[idx] for idx in key) + e >= value
    model.solve()
    print_solution(model)

    return model

def shapley(utility, N):

    shapley_dict = {}
    for i in range(1, N+1):
        shapley_dict[i] = 0
    for key in utility:
        if key != ():
            for contributor in key:
                # print('contributor:', contributor, key) # print check
                marginal_contribution = utility[key] - utility[tuple(i for i in key if i!=contributor)]
                # print('marginal:', marginal_contribution) # print check
                shapley_dict[contributor] += marginal_contribution /((comb(N-1,len(key)-1))*N)

    return shapley_dict

In [None]:
transform_train = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomCrop(32, padding=4),
    #transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    #transforms.RandomErasing(scale=(0.1, 0.3), ratio=(0.5, 2), value=0)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = CIFAR10(
    root='./data', train=True, download=True)

test_dataset = CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)

x_train = trainset.data
y_train = np.array(trainset.targets)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
class ResNet9(nn.Module):
    def __init__(self):
        super(ResNet9, self).__init__()
        self.prep = self.convbnrelu(channels=3, filters=64)
        self.layer1 = self.convbnrelu(64, 128)
        self.layer_pool = nn.MaxPool2d(2, 2, 0, 1, ceil_mode=False)
        self.layer1r1 = self.convbnrelu(128, 128)
        self.layer1r2 = self.convbnrelu(128, 128)
        self.layer2 = self.convbnrelu(128, 256)
        self.layer3 = self.convbnrelu(256, 512)
        self.layer3r1 = self.convbnrelu(512, 512)
        self.layer3r2 = self.convbnrelu(512, 512)
        self.out_pool = nn.MaxPool2d(kernel_size=4, stride=4, ceil_mode=False)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(in_features=512, out_features=10, bias=False)

    def convbnrelu(self, channels, filters):
        layers = []
        layers.append(nn.Conv2d(channels, filters, (3, 3),
                                (1, 1), (1, 1), bias=False))
        layers.append(nn.BatchNorm2d(filters, track_running_stats=False))
        layers.append(nn.ReLU(inplace=True))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.prep(x)
        x = self.layer_pool(self.layer1(x))
        r1 = self.layer1r2(self.layer1r1(x)) 
        x = x + r1
        x = self.layer_pool(self.layer2(x))
        x = self.layer_pool(self.layer3(x))
        r3 = self.layer3r2(self.layer3r1(x))
        x = x + r3
        out = self.out_pool(x)
        out = self.flatten(out)
        out = self.linear(out)
        out = out * 0.125

        return out

class CustomTensorDataset(Dataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors, transform=None):
        self.tensors = tensors
        self.transform = transform

    def __getitem__(self, index):
        x = self.tensors[0][index]

        if self.transform:
            x = self.transform(x)

        y = self.tensors[1][index]

        return x, y

    def __len__(self):
        return self.tensors[0].shape[0]

In [None]:
class LocalUpdate(object):

    def __init__(self, lr, local_ep, trainloader):
        self.lr = lr
        self.local_ep = local_ep
        self.trainloader = trainloader

    def update_weights(self, model):

        model.train()
        epoch_loss = []
        optimizer = torch.optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss().to(device)
        for iter in range(self.local_ep):
            batch_loss = []
            for batch_idx, (images, labels) in enumerate(self.trainloader):
                images, labels = images.to(device), labels.to(device)
                model.zero_grad()   
                log_probs = model(images)
                loss = criterion(log_probs, labels)
                loss.backward()
                optimizer.step()
                batch_loss.append(loss.item())
            epoch_loss.append(sum(batch_loss)/len(batch_loss))

        return model.state_dict(), sum(epoch_loss) / len(epoch_loss)

def test_inference(model, test_dataset):
    """
    Computes mean Dice across the three BraTS channels
    (whole tumour, tumour-core, enhancing-core).

    Returns
    -------
    float   ─ mean Dice in the range [0, 1]
    """
    model.eval()
    loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    dice_sum = torch.zeros(3, device=device)   # accumulate per-class Dice
    n_batches = 0

    with torch.no_grad():
        for batch in loader:
            # ---- image tensor ------------------------------------------------
            img = torch.cat([batch[k] for k in ("flair", "t1", "t1ce", "t2")],
                            dim=1).to(device)                     # (B,4,D,H,W)

            # ---- ground-truth mask → one-hot (B,3,D,H,W) ---------------------
            raw    = batch["seg"].squeeze(1).cpu().numpy()       # (B,D,H,W)
            target = torch.tensor(
                        preprocess_mask_labels(raw),
                        dtype=torch.float32, device=device
                     )

            # ---- network prediction → one-hot (B,3,D,H,W) --------------------
            logits  = model(img)                                 # (B,3,D,H,W)
            pred_ch = torch.argmax(logits, dim=1)                # (B,D,H,W)
            pred    = torch.nn.functional.one_hot(
                          pred_ch, num_classes=3
                      ).permute(0,4,1,2,3).float()

            # ---- per-class Dice ---------------------------------------------
            intersect = 2 * (pred * target).sum(dim=(2,3,4))     # (B,3)
            denom     = (pred + target).sum(dim=(2,3,4)) + 1e-6
            dice_sum += (intersect / denom).squeeze(0)           # add (3,)
            n_batches += 1

    return (dice_sum / n_batches).mean().item()                  # scalar Dice


In [7]:
N = 10 #srch
local_bs = 512
lr = 0.01
local_ep = 5
EPOCHS = 5

noise_rates = np.linspace(0, 1, N, endpoint=False)
split_dset = mnist_iid(trainset, N)
user_groups = {i: 0 for i in range(1, N+1)}
noise_idx = {i: 0 for i in range(1, N+1)}
train_datasets = {i: 0 for i in range(1, N+1)}
for n in range(N):
    user_groups[n+1] = np.array(list(split_dset[n]), dtype=np.int32)
    user_train_x, user_train_y = x_train[user_groups[n+1]], y_train[user_groups[n+1]]
    user_noisy_y, noise_idx[n+1] = noisify_MNIST(noise_rates[n], 'symmetric', user_train_x, user_train_y)
    
    train_datasets[n+1] = CustomTensorDataset((user_train_x, user_noisy_y), transform_train)

def fixfuckingbn(subset_weights, global_model_state_dict):
    for pair_1, pair_2 in zip(subset_weights.items(), global_model_state_dict.items()):
        if ('running' in pair_1[0]) or ('batches' in pair_1[0]):
            subset_weights[pair_1[0]] = global_model_state_dict[pair_1[0]]
    
    return subset_weights

global_model = ResNet9().to(device)
global_model.to(device)
global_model.train()

global_weights = global_model.state_dict()
powerset = list(powersettool(range(1, N+1)))
submodel_dict = {}  
submodel_dict[()] = copy.deepcopy(global_model)
accuracy_dict = {}
shapley_dict = {}

In [8]:
user_groups

{1: array([    6, 32782, 32784, ..., 32753, 32754, 32759], dtype=int32),
 2: array([32779, 32780,    13, ..., 32748, 32761, 32767], dtype=int32),
 3: array([    1,    18, 32788, ..., 32742, 32747, 32763], dtype=int32),
 4: array([32770, 32777,    34, ..., 32733, 32745, 32756], dtype=int32),
 5: array([    0, 32769, 32778, ..., 32751, 32760, 32766], dtype=int32),
 6: array([   12, 32787,    27, ..., 32722, 32729, 32739], dtype=int32),
 7: array([32771,     7,    11, ..., 32752, 32757, 32764], dtype=int32),
 8: array([    2,     5, 32775, ..., 32732, 32736, 32741], dtype=int32),
 9: array([    4,    21,    24, ..., 32735, 32758, 32765], dtype=int32),
 10: array([32768,     3, 32772, ..., 32749, 32755, 32762], dtype=int32)}

In [None]:
start_time = time.time()

for subset in range(1, N+1):
    submodel_dict[(subset,)] = copy.deepcopy(global_model)
    submodel_dict[(subset,)].to(device)
    submodel_dict[(subset,)].train() 
 
train_loss, train_accuracy = [], []
val_acc_list, net_list = [], []
print_every = 1

idxs_users = np.arange(1, N+1)
total_data = sum(len(user_groups[i]) for i in range(1, N+1))
fraction = [len(user_groups[i])/total_data for i in range(1, N+1)]

for epoch in tqdm(range(EPOCHS)):
    local_weights, local_losses = [], []
    print(f'\n | Global Training Round : {epoch+1} |\n')
    global_model.train()
    for idx in idxs_users:
        trainloader = DataLoader(train_datasets[idx], batch_size=local_bs, shuffle=True)
        local_model = LocalUpdate(lr, local_ep, trainloader)
        w, loss = local_model.update_weights(model=copy.deepcopy(global_model))
        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))
    global_weights = average_weights(local_weights, fraction) 
    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)

    gradients = calculate_gradients(local_weights, global_model.state_dict()) 
    for i in range(1, N+1):
        subset_weights = update_weights_from_gradients(gradients[i-1], submodel_dict[(i,)].state_dict()) 
        subset_weights = fixfuckingbn(subset_weights, global_model.state_dict())
        submodel_dict[(i,)].load_state_dict(subset_weights)

    global_model.load_state_dict(global_weights)
    global_model.eval()

    if (epoch+1) % print_every == 0:
        print(f' \nAvg Training Stats after {epoch+1} global rounds:')
        print(f'Training Loss : {np.mean(np.array(train_loss))}')
        # print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))

test_acc, test_loss = test_inference(global_model, test_dataset)
print(f' \n Results after {EPOCHS} global rounds of training:')
print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))

accuracy_dict[powerset[-1]] = test_acc

# ADJUSTED-OR APPROX
for subset in powerset[:-1]: 
    if len(subset) > 1:
        # calculate the average of the subset of weights from list of all the weights
        subset_weights = average_weights([submodel_dict[(i,)].state_dict() for i in subset], [fraction[i-1] for i in subset]) 
        submodel = copy.deepcopy(submodel_dict[()])
        submodel.load_state_dict(subset_weights)
        
        test_acc, test_loss = test_inference(submodel,test_dataset)
        print(f' \n Results after {EPOCHS} global rounds of training (for OR): ')
        print("|---- Test Accuracy for {}: {:.2f}%".format(subset, 100*test_acc))
        accuracy_dict[subset] = test_acc
    else: 
        test_acc, test_loss = test_inference(submodel_dict[subset], test_dataset)
        accuracy_dict[subset] = test_acc

trainTime = time.time() - start_time
start_time = time.time()
shapley_dict = shapley(accuracy_dict, N)
shapTime = time.time() - start_time
start_time = time.time()
lc_dict = least_core(accuracy_dict, N)
LCTime = time.time() - start_time
totalShapTime = trainTime + shapTime
totalLCTime = trainTime + LCTime
print(f'\n ACCURACY: {accuracy_dict[powerset[-1]]}')
print('\n Total Time Shapley: {0:0.4f}'.format(totalShapTime))
print('\n Total Time LC: {0:0.4f}'.format(totalLCTime))

  0%|          | 0/5 [00:00<?, ?it/s]


 | Global Training Round : 1 |



 20%|██        | 1/5 [02:06<08:26, 126.61s/it]

 
Avg Training Stats after 1 global rounds:
Training Loss : 2.0541287753582003

 | Global Training Round : 2 |



 40%|████      | 2/5 [04:13<06:19, 126.62s/it]

 
Avg Training Stats after 2 global rounds:
Training Loss : 2.016412021398544

 | Global Training Round : 3 |



 60%|██████    | 3/5 [06:20<04:13, 126.80s/it]

 
Avg Training Stats after 3 global rounds:
Training Loss : 1.9729186907609302

 | Global Training Round : 4 |



 80%|████████  | 4/5 [08:27<02:06, 126.86s/it]

 
Avg Training Stats after 4 global rounds:
Training Loss : 1.9333354570567605

 | Global Training Round : 5 |



100%|██████████| 5/5 [10:34<00:00, 126.95s/it]

 
Avg Training Stats after 5 global rounds:
Training Loss : 1.89786670691967





 
 Results after 5 global rounds of training:
|---- Test Accuracy: 62.70%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 2): 44.01%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 3): 45.56%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 4): 46.28%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 5): 44.70%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 6): 44.29%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 7): 39.96%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 8): 39.09%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 9): 19.60%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accuracy for (1, 10): 8.34%
 
 Results after 5 global rounds of training (for OR): 
|---- Test Accu

In [None]:
def stats(vector):
    n = len(vector)
    egal = np.array([1/n for i in range(n)])
    normalised = np.array(vector / vector.sum())
    msg = f'Original vector: {vector}\n'
    msg += f'Normalised vector: {normalised}\n'
    msg += f'Max Dif: {normalised.max()-normalised.min()}\n'
    msg += f'Distance: {np.linalg.norm(normalised-egal)}\n'

    msg += f'Budget: {vector.sum()}\n'
    print(msg)

In [None]:
stats(np.array(list(shapley_dict.values())))

Original vector: [ 0.12156075  0.10917627  0.10981044  0.10419635  0.07705849  0.07902048
  0.04776313  0.03351619 -0.03449246 -0.12100964]
Normalised vector: [ 0.23084078  0.20732296  0.20852722  0.19786622  0.14633212  0.15005787
  0.09070098  0.06364639 -0.0655003  -0.22979423]
Max Dif: 0.4606350110622801
Distance: 0.4384159898612196
Budget: 0.5266



In [None]:
stats(np.array([i.value() for i in lc_dict.variables()])[1:])

Original vector: [0.100125 0.       0.091025 0.107525 0.101125 0.0763   0.0948   0.0561
 0.       0.      ]
Normalised vector: [0.159689   0.         0.14517544 0.17149123 0.16128389 0.12169059
 0.15119617 0.08947368 0.         0.        ]
Max Dif: 0.17149122807017544
Distance: 0.21834065249685256
Budget: 0.627

