In [None]:
!nvidia-smi

# import

In [2]:
import copy
import pstats
import numpy as np
import random

import time

import torchvision
from torchvision import transforms
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
import torch.nn.functional as F
from torch.cuda.amp import autocast as autocast

# Experimental Settings

In [None]:
# Start time
start = time.time()
local_start_time = time.asctime(time.localtime(time.time()))
print(
    f"==========================================================START experiment time: {local_start_time}==========================================================")

# Torch Version
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('torch version: ', torch.__version__)
print('torchvision version: ', torchvision.__version__)

# Seed
seed = 1
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
np.random.seed(seed)

# dataset
datasetType = 'fashionmnist' 
# possible choice: mnist fashionmnist cifar, for Clothing1M dataset, please refer to:https://github.com/Cysu/noisy_label to request the official dataset (not open source)

# TODO:
# Settings
Is_IID = True                                       # IID or Non-IID
# if Non-IID, how many classes per client
classes_per_client = 5
K = 100                                             # Total Clients Number
Clients_all_idxs = [i for i in range(K)]            # All Clients Indexes



adding_noise = True                                 # adding noise or not
add_noise_type = "symmetric"                        # Noise Type: "symmetric" or "pairflip"
noise_rate = 0.5                                    # Noise Rate



total_rounds = 100                                  # Total Global Rounds
C = 0.05                                            # percentage of fraction for client selection in each global round
local_epochs = 5                                    # local epochs
lr = 0.15                                           # a relatively bigger learning rate helps to prevent overfitting, please refer to the paper: JOINT OPTIMIZATION, CVPR 2018
momentum = 0.9                                      # momentum
batchSize = 60                                      # local batch size
weight_decay = 0.0001                               # l2 regularization

feature_dim = 128                                   # intermediate feature dimension

test_accuracies = []                                # to store the test accuracies for each round
TotalClients = []                                   # Initialize the total clients


cachedWeights, cachedDatasizes = {}, {}             # cached weights and cached data sizes for conducting model aggregation at the server side
for i in range(len(Clients_all_idxs)):
    cachedWeights[i] = 0
    cachedDatasizes[i] = 0


#TODO: Hyperparameters
gamma = 0.4
lambda_e = 0.6
T_d_reverse = 3                                     # equals to 1/T_d
global_t_w = 0.2 * total_rounds


# Dataset loading

In [14]:
from torchvision.transforms.transforms import Resize
"""# Dataset loading"""

if datasetType == 'fashionmnist':
    transforms_mnist = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
    dataset_train = datasets.FashionMNIST(
        './data/fashionmnist_data/', train=True, download=True, transform=transforms_mnist)
    dataset_test = datasets.FashionMNIST(
        './data/fashionmnist_data/', train=False, download=True, transform=transforms_mnist)
    input_channel = 1                           # input color channel num


elif datasetType == 'mnist':
    transforms_mnist = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    dataset_train = datasets.MNIST(
        './data/mnist/', train=True, download=True, transform=transforms_mnist)
    dataset_test = datasets.MNIST(
        './data/mnist/', train=False, download=True, transform=transforms_mnist)
    input_channel = 1


elif datasetType == 'cifar':
    transforms_cifar = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[
                             0.229, 0.224, 0.225])
    ])
    dataset_train = datasets.CIFAR10(
        './data/cifar/', train=True, download=True, transform=transforms_cifar)
    dataset_test = datasets.CIFAR10(
        './data/cifar/', train=False, download=True, transform=transforms_cifar)
    input_channel = 3

    
else:
    raise NotImplementedError('Error: unrecognized dataset')


Num_classes = 10                                            # number of classes in the dataset
labels_array = [i for i in range(Num_classes)]              #
labels = np.array(dataset_train.targets)                    # retrieve the real labels
true_Labels = copy.deepcopy(labels)                         # cache the real labels


# noise_or_not = []

img_size = dataset_train[0][0].shape                        # image size
classes = np.array(list(dataset_train.class_to_idx.values()))


local_datasize = len(labels)/K                              # local dataset size


if datasetType == 'fashionmnist':
    tt_transform = transforms.Compose([
            transforms.RandomRotation(30)])
elif datasetType == 'mnist':
    tt_transform = transforms.Compose([
            transforms.RandomRotation(30)])
elif datasetType == 'cifar':
    s = 1
    ssize=28
    color_jitter = torchvision.transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)
    tt_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([color_jitter], p=0.8),
        transforms.RandomGrayscale(p=0.2)])


# retrieve the data according to distributed data indexes
class CustomDataset(Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, label



"""add noise"""
def noisify_label(true_label, num_classes=10, noise_type="symmetric"):
    if noise_type == "symmetric":
        label_lst = list(range(num_classes))
        label_lst.remove(true_label)
        return random.sample(label_lst, k=1)[0]

    elif noise_type == "pairflip":
        return (true_label - 1) % num_classes


# NonIID
classes_mapper = {}
for item in classes:
    # numpy to int
    classes_mapper[int(item)] = []

# TODO: Data Parititioning


def customizedIdxsGenerator(Bias_array, local_datasize):
    """
    @Params:
    Bias_array reprensents the ground-truth label distribution of each client
    local_datasize represents the local dataset size
    """
    # the data nums for each class
    Bias_num_array = copy.deepcopy(Bias_array)
    for i in range(len(Bias_array)):
        Bias_num_array[i] = int(local_datasize*Bias_array[i])

    local_dataitem_idxs = np.array([])

    for key in classes_mapper.keys():
        # concatenate
        local_dataitem_idxs = np.concatenate((local_dataitem_idxs, np.random.choice(
            classes_mapper[key], int(Bias_num_array[key]), replace=False)), axis=0)

    np.random.shuffle(local_dataitem_idxs)
    local_dataitem_idxs = local_dataitem_idxs.astype(int)

    # return the data indexes for each client in Python list Type
    return local_dataitem_idxs.tolist()


def classesIdxsGenerator(classes, dataset):
    for i in range(len(dataset)):
        # tensor to int
        targetForOnce = int(dataset.targets[i])
        classes_mapper[targetForOnce].append(i)

    for key in classes_mapper.keys():
        classes_mapper[key] = np.array(classes_mapper[key])
        np.random.seed(seed)
        np.random.shuffle(classes_mapper[key])
    return


classesIdxsGenerator(classes=classes, dataset=dataset_train)

# Adding noise
if adding_noise == True:
    len_of_Dataset = len(dataset_train)

    for key in classes_mapper.keys():
        len_of_Datasize_thisClass = int(len(classes_mapper[key]) * noise_rate)
        # inject equal number of noisy samples for each class
        # randomly select samples and add noise
        np.random.seed(seed)

        sampleIdxs_random_choice = np.random.choice(
            classes_mapper[key], len_of_Datasize_thisClass, replace=False)
        for i in range(len(sampleIdxs_random_choice)):
            changed_item_index = sampleIdxs_random_choice[i]
            true_label = dataset_train.targets[changed_item_index]
            dataset_train.targets[changed_item_index] = noisify_label(
                true_label, num_classes=Num_classes, noise_type=add_noise_type)


Files already downloaded and verified
Files already downloaded and verified


# Base Model

In [15]:
def call_bn(bn, x):
    return bn(x)


class CNN(nn.Module):
    def __init__(self, input_channels=3, n_outputs=10, dropout_rate=0.25, top_bn=False):
        self.dropout_rate = dropout_rate
        self.top_bn = top_bn
        super(CNN, self).__init__()
        self.c1 = nn.Conv2d(input_channels, 128,
                            kernel_size=3, stride=1, padding=1)
        self.c2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.c3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.c4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.c5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.c6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.c7 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=0)
        self.c8 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=0)
        self.c9 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=0)
        self.l_c1 = nn.Linear(128, n_outputs)
        self.bn1 = nn.BatchNorm2d(128)
        self.bn2 = nn.BatchNorm2d(128)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        self.bn5 = nn.BatchNorm2d(256)
        self.bn6 = nn.BatchNorm2d(256)
        self.bn7 = nn.BatchNorm2d(512)
        self.bn8 = nn.BatchNorm2d(256)
        self.bn9 = nn.BatchNorm2d(128)

    def forward(self, x):
        h = x
        h = self.c1(h)
        h = F.leaky_relu(self.bn1(h), negative_slope=0.01)
        h = self.c2(h)
        h = F.leaky_relu(self.bn2(h), negative_slope=0.01)
        h = self.c3(h)
        h = F.leaky_relu(self.bn3(h), negative_slope=0.01)
        h = F.max_pool2d(h, kernel_size=2, stride=2)
        h = F.dropout2d(h, p=self.dropout_rate)

        h = self.c4(h)
        h = F.leaky_relu(self.bn4(h), negative_slope=0.01)
        h = self.c5(h)
        h = F.leaky_relu(self.bn5(h), negative_slope=0.01)
        h = self.c6(h)
        h = F.leaky_relu(self.bn6(h), negative_slope=0.01)
        h = F.max_pool2d(h, kernel_size=2, stride=2)
        h = F.dropout2d(h, p=self.dropout_rate)

        h = self.c7(h)
        h = F.leaky_relu(self.bn7(h), negative_slope=0.01)
        h = self.c8(h)
        h = F.leaky_relu(self.bn8(h), negative_slope=0.01)
        h = self.c9(h)
        h = F.leaky_relu(self.bn9(h), negative_slope=0.01)
        h = F.avg_pool2d(h, kernel_size=h.data.shape[2])

        feature = h.view(h.size(0), h.size(1))

        logit = self.l_c1(feature)
        if self.top_bn:
            logit = call_bn(self.bn_c1, logit)
        return logit, feature

# Client

In [16]:
"""# Client"""


class Client(object):
    def __init__(self, client_index, dataset, batchsize, learning_rate, local_epochs, data_idxs, bias_array):
        self.client_index = client_index
        self.bias_array = torch.Tensor(bias_array)
        self.data_idxs = data_idxs
        self.batchsize = batchsize
        self.train_loader = DataLoader(CustomDataset(
            dataset, data_idxs), batch_size=batchsize, shuffle=True)
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.epochs = local_epochs
        self.local_datasize = len(data_idxs)
        self.t_w = global_t_w

    def printProfile(self):
        print(
            f"ClientNumber: {self.client_index},learningRate: {self.learning_rate},local_epochs {self.epochs},and its bias is {self.bias_array}")
        print(
            f"Type of dataidxs is {type(self.data_idxs)} and its length is {len(self.data_idxs)}, wrong labels length is {len(self.wrong_labels_idxs)}")
        return
    
    def similarloss(self, predict, target):
        """
        :param predict: model prediction for original data
        :param target: model prediction for mild augmented data
        :return: loss
        """
        return 2-2*torch.cosine_similarity(predict, target, dim=-1)

    def L1loss(self,predict,target):
        """
        :param predict: model prediction for original data
        :param target: model prediction for mildly augmented data
        :return: loss
        """
        loss_f = nn.L1Loss(reduction='mean')
        l1_loss = loss_f(predict,target)
        return l1_loss
    
    def L2loss(self,predict,target):
        """
        :param predict: model prediction for original data
        :param target: model prediction for mildly augmented data
        :return: loss
        """
        loss_f_mse = nn.MSELoss(reduction='mean')
        loss_mse = loss_f_mse(predict,target)
        return loss_mse

    def js(self, p_output, q_output):
        """
        :param predict: model prediction for original data
        :param target: model prediction for mildly augmented data
        :return: loss
        """
        KLDivLoss = nn.KLDivLoss(reduction='mean')
        log_mean_output = ((p_output + q_output )/2).log()
        return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2


    def train(self, model, current_round):
        criterion = nn.CrossEntropyLoss()
        
        sm = torch.nn.Softmax(dim=1)
        lsm = torch.nn.LogSoftmax(dim=1)


        optimizer = torch.optim.SGD(model.parameters(
        ), lr=self.learning_rate, momentum=self.momentum, weight_decay=weight_decay)
        e_loss = []
        for _ in range(1, self.epochs+1):  # _ means each local epoch
            train_loss = 0.0
            model.train()
            for data, labels in self.train_loader:
                if torch.cuda.is_available():
                    data, labels = data.cuda(), labels.cuda()
                
                # conduct augmentation on GPU to accelerate training
                data_aug = tt_transform(data)
                data_aug = data_aug.cuda()
                
                optimizer.zero_grad()  # clear the gradients
                
                with autocast():
                    output1, f1 = model(data)  # make a forward pass
                    output2, f2 = model(data_aug) # make a forward pass

                    mix_1 = np.random.beta(1,1) # mixing predict1 and predict2
                    # mix_1 = 1
                    mix_2 = 1-mix_1



                    # to further conduct self distillation
                    logits1,logits2=torch.softmax(output1*3, dim=1),torch.softmax(output2*3, dim=1)
                    
                    
                    # for training stability to conduct clamping to avoid exploding gradients, which is also used in Symmetric CE, ICCV 2019
                    logits1,logits2 = torch.clamp(logits1, min=1e-6, max=1.0), torch.clamp(logits2, min=1e-6, max=1.0) 
                    
                    # to conduct self entropy regularization
                    L_e = - (torch.mean(torch.sum(sm(logits1) * lsm(logits1), dim=1))+torch.mean(torch.sum(sm(logits1) * lsm(logits1), dim=1))) * 0.5
                    
                    # to mix up the two predictions
                    p = torch.softmax(output1, dim=1)*mix_1 + torch.softmax(output2, dim=1)*mix_2

                    # to get sharpened prediction p_s
                    pt = p**(2)
                    # normalize the prediction
                    pred_mix = pt / pt.sum(dim=1, keepdim=True)

                    
                    betaa = gamma
                    if(current_round<self.t_w):
                        betaa = gamma * current_round/self.t_w                    
                    
                
                    loss = criterion(pred_mix, labels)  # to compute cross entropy loss

                    loss +=  self.js(logits1,logits2) * betaa

                    loss += L_e * lambda_e
    #                 loss +=  self.similarloss(logits1,logits2).mean() * betaa
                
                loss.backward()  # do a backwards pass
                optimizer.step()  # perform a single optimization step
                train_loss += loss.item()*data.size(0)  # update training loss

            # average losses
            train_loss = train_loss/len(self.train_loader.dataset)
            e_loss.append(train_loss)

        total_loss = sum(e_loss)/len(e_loss)
        return model.state_dict(), total_loss

# Testing

In [None]:
def TestGlobalModel(model, dataset, criterion, test_batchsize, num_classes, classes):
    # test loss
    test_loss = 0.0
    correct_class = list(0. for i in range(num_classes))
    total_class = list(0. for i in range(num_classes))

    test_loader = DataLoader(dataset, batch_size=test_batchsize)
    l = len(test_loader.dataset)
    model.eval()
    correcttt = 0

    for data, labels in test_loader:
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()

        output, _ = model(data)
        loss = criterion(output, labels)
        test_loss += loss.item()*data.size(0)

        _, pred = torch.max(output, 1)

        correcttt += pred.eq(labels.data.view_as(pred)).float().sum().item()
        correct_tensor = pred.eq(labels.data.view_as(pred))
        correct = np.squeeze(correct_tensor.numpy()) if not torch.cuda.is_available(
        ) else np.squeeze(correct_tensor.cpu().numpy())

        # test accuracy for each object class
        for i in range(num_classes):
            label = labels.data[i]
            correct_class[label] += correct[i].item()
            total_class[label] += 1

    # avg test loss
    test_loss = test_loss/len(test_loader.dataset)
    print("Test Loss: {:.6f}\n".format(test_loss))

    # print test accuracy
    for i in range(10):
        if total_class[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' %
                  (classes[i], 100 * correct_class[i] / total_class[i],
                   np.sum(correct_class[i]), np.sum(total_class[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' %
                  (classes[i]))

    print('\nTEST ACC {:.3f} Final Test  Accuracy: {:.3f} ({}/{})'.format(100.0*correcttt/l,
                                                                          100. *
                                                                          np.sum(
                                                                              correct_class) / np.sum(total_class),
                                                                          np.sum(correct_class), np.sum(total_class)))

    test_accuracies.append(100.0*correcttt/l)


# Algorithm

In [17]:
def averaging(w_group, selected_client_idxs):
    w_avg = copy.deepcopy(w_group[selected_client_idxs[0]])
    for k in w_avg.keys():
        for id in selected_client_idxs[1:]:
            w_avg[k] += w_group[id][k]
        w_avg[k] = torch.div(w_avg[k], len(w_group))

    return w_avg


"""## Overall FedAvg"""

def FedAvg(model, rounds):
    global_weights = model.state_dict()
    last_round_timing = time.time()
    print(f"Start experiment time: {last_round_timing}")
    for i in range(rounds):
        print(f"\nGLOBAL ROUND {i}, Start Training")
        selected_client_num = max(int(C*len(Clients_all_idxs)), 1)
        selected_client_idxs = np.random.choice(
            Clients_all_idxs, selected_client_num, replace=False)
        local_Loss = []
        tempTotalDatasize = 0
        for k in selected_client_idxs:
            local_update = TotalClients[k]
            weights, _ = local_update.train(
                model=copy.deepcopy(model), current_round=i)  # _ means loss
            cachedWeights[k] = copy.deepcopy(weights)
            cachedDatasizes[k] = local_update.local_datasize
            tempTotalDatasize += local_update.local_datasize
            local_Loss.append(copy.deepcopy(_))
        loss_avg = sum(local_Loss) / len(local_Loss)


        weights_avg = copy.deepcopy(cachedWeights[selected_client_idxs[0]])
        for key in weights_avg.keys():
            rr = cachedDatasizes[selected_client_idxs[0]]/tempTotalDatasize
            weights_avg[key] = torch.mul(weights_avg[key], rr)
        for key in weights_avg.keys():
            for k in selected_client_idxs[1:]:
                ratio = cachedDatasizes[k]/tempTotalDatasize
                weights_avg[key] += cachedWeights[k][key]*ratio

        global_weights = weights_avg
        print('\n')
        model.load_state_dict(weights_avg)
        Criterion = nn.CrossEntropyLoss()
        TestGlobalModel(model=model, dataset=dataset_test, criterion=Criterion,
                        test_batchsize=128, num_classes=Num_classes, classes=classes)
        current_round_timing = time.time()
        print(
            f"Duration for this round is : {current_round_timing-last_round_timing}")
        last_round_timing = current_round_timing



# Initialization

In [18]:
"""# Initialization"""


def initialization():
    print(f"Generating {K} clients and sampling random generation profile...")
    for i in range(K):
        if Is_IID == False:
            selected_labels = np.random.choice(
                labels_array, classes_per_client, replace=False)
            client_distribution = np.array(
                [0. for i in labels_array]) 
            for each in selected_labels:
                client_distribution[each] = random.uniform(0, 1)
            client_distribution /= client_distribution.sum()
            biasArrayTemp = client_distribution

            # generate all indexes based on the ground truth label distribution
            clientDatasetIdxs = customizedIdxsGenerator(
                Bias_array=biasArrayTemp, local_datasize=local_datasize)
            wrong_label_mapping_temp = {}

            clientTemp = Client(client_index=i, dataset=dataset_train, batchsize=batchSize, learning_rate=lr,
                                local_epochs=local_epochs, data_idxs=clientDatasetIdxs, bias_array=biasArrayTemp)
            clientTemp.wrong_labels_idxs = wrong_label_mapping_temp
            if(i % 11 == 0):
                clientTemp.printProfile()
                if(adding_noise == True):
                    print(
                        f"【Noise Added, its type is {add_noise_type} and noise ration is {noise_rate}】")
            TotalClients.append(clientTemp)
        else:
            client_distribution = np.array([0.1 for i in labels_array])
            biasArrayTemp = client_distribution
            # generate all indexes based on the ground truth label distribution
            clientDatasetIdxs = customizedIdxsGenerator(
                Bias_array=biasArrayTemp, local_datasize=local_datasize)
            wrong_label_mapping_temp = {}

            clientTemp = Client(client_index=i, dataset=dataset_train, batchsize=batchSize, learning_rate=lr,
                                local_epochs=local_epochs, data_idxs=clientDatasetIdxs, bias_array=biasArrayTemp)
            clientTemp.wrong_labels_idxs = wrong_label_mapping_temp

            if(i % 11 == 0):
                clientTemp.printProfile()
                if(adding_noise == True):
                    print(
                        f"【Noise Added, its type is {add_noise_type} and noise ration is {noise_rate}】")
            TotalClients.append(clientTemp)

    # noise_or_not = np.transpose(
    #     dataset_train.targets) == np.transpose(true_Labels)
    print(f"Done, {K} clients are generated for initialization...")


# Main

In [None]:
def main():
    # load model
    cnn_net = CNN(input_channels=input_channel)
    if torch.cuda.is_available():
        cnn_net.cuda()
    initialization()
    FedAvg(model=cnn_net, rounds=total_rounds)


    print("==========================Final Results================================")
    print(test_accuracies)
    print(f"Last 10 rounds average test accuracy: {sum(test_accuracies[-10:])/10} %")
    # print(train_losses)
    # print(test_accuracies)
    txtfile = './'+'FedAvgLSR+'+'_dataset-'+datasetType+'_NoiseType_'+add_noise_type+'_NoiseRate_'+str(noise_rate)+'_IID_'+str(adding_noise)+'.txt'
    with open(txtfile,'a') as myfile:
        text = str(test_accuracies)
        myfile.write(text[1:-1])
    print("=======================================================================")

main()