In [1]:
%matplotlib inline
# python libraties
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from PIL import Image
import pickle

# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms

# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# to make the results are reproducible
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)

print(os.listdir("/u/student/2020/cs20btech11046/resnet/input"))

In [2]:
# set random seeds (this is used to ensure that you get the same randomness everytime no matter how many times you run the code)
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)

# set device (if gpu/cuda is available perform the neural network operations using that else use a cpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("| using device:", device)

| using device: cuda


In [3]:
# hyperparameters
bsz = 10  #batch sizebb
no_clients = 10 #no.of clients
epsilon = 1e-10 #used in scaffold_experiment function (not sure what formula is used)
lr = 0.01

# data loading

In [13]:
data_dir = '/u/student/2020/cs20btech11046/resnet/input/skin-cancer-mnist-ham10000'
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'dermatofibroma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [14]:
def compute_img_mean_std(image_paths):
    """
        computing the mean and std of three channel on the whole dataset,
        first we should normalize the image from 0-255 to 0-1
    """

    img_h, img_w = 224, 224
    imgs = []
    means, stdevs = [], []

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        imgs.append(img)

    imgs = np.stack(imgs, axis=3)
    print(imgs.shape)

    imgs = imgs.astype(np.float32) / 255.

    for i in range(3):
        pixels = imgs[:, :, i, :].ravel()  # resize to one row
        means.append(np.mean(pixels))
        stdevs.append(np.std(pixels))

    means.reverse()  # BGR --> RGB
    stdevs.reverse()

    print("normMean = {}".format(means))
    print("normStd = {}".format(stdevs))
    return means,stdevs

In [15]:
norm_mean,norm_std = compute_img_mean_std(all_image_path)

100%|████████████████████████████████████| 20030/20030 [03:01<00:00, 110.14it/s]


(224, 224, 3, 20030)
normMean = [0.7630444, 0.5456509, 0.57003975]
normStd = [0.14092743, 0.15261324, 0.16997053]


In [4]:
#a class NonIIDMyDataset is created to access and transform data
class NonIIDMyDataset(Dataset):
    #the __init__ function in Python is like the C++ constructor in an object-oriented approach
    def __init__(self, data, targets, transform=None):
        self.data = data  #data is X
        self.targets = torch.LongTensor(targets)  #tragets are y. Convert y to a tensor to be able to used torch function on them
        self.transform = transform  #this is the transformation to be applied on to X. By default the value is None.
                                    #In the 2nd cell below you can see the exact transform used in the code

    #this function is used to apply a transformation (if any) to X and return the pair (X, y) based on the index passed
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        if self.transform:
            # x = Image.fromarray(self.data[index].astype(np.uint8).transpose(1,2,0))
            x = self.transform(x)

        return x, y

    #this function is used to get the length/no.of features of X
    def __len__(self):
        return len(self.data)

#The functions with __ at the front and back of every function in Python are called Magic Methods/Dunder Methods.
#Magic methods are not meant to be invoked directly by you, but the invocation happens internally from the class on a certain action.
#Not sure of the meaning but may be this concept is understood better if you find where these methods are used int he code.

In [5]:
######################################################################
from torch.optim import Optimizer
from torch.optim.lr_scheduler import StepLR

class AlgoOptimizer(Optimizer):
    def __init__(self, params, lr, weight_decay):
        defaults = dict(lr=lr, weight_decay=weight_decay)
        super(AlgoOptimizer, self).__init__(params, defaults)

    def step(self, main_controls, client_controls, closure=None):

        loss = None
        if closure is not None:
            loss = closure

        for group in self.param_groups:
            for p, c, ci in zip(group['params'], main_controls.values(), client_controls.values()):
                if p.grad is None:
                    continue
                dp = p.grad.data + c.data - ci.data
                p.data = p.data - dp.data * 0.01

        return loss
######################################################################

In [6]:
######################################################################
def save_object(obj, filename):
    with open(filename, "wb") as fp:
        pickle.dump(obj, fp, pickle.HIGHEST_PROTOCOL)

def read_object(filename):
    with open(filename, "rb") as fp:
        obj = pickle.load(fp)

    return obj
######################################################################

In [7]:
#these are the locations of train and test data for 20 clients used in the code
#there are many other folders as well in this dataset folder. May be different ones are used for different cases
train_location = '/u/student/2020/cs20btech11046/resnet/old/dataset/practical/10/train/'
test_location = '/u/student/2020/cs20btech11046/resnet/old/dataset/practical/10/test/'

In [8]:
#the transforms library imported above, is used to create a transformation of the data(X)
#transforms.Compose - to put more than one sequantial transforms into one
#transforms.ToTensor - to convert a list/np array to a tensor
#transform.Normalize - transforms.Normalize(mean, std, inplace=False) to normalize a tensor with give mean and std
#to normalize a data means changinf x to (x-mean)/std
#here mean is 0.137 and std is 0.3081. May be these values are obtained by calculating mean and std of X separately or they are random. Not sure

#how did these value we got
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

#this function converts y of data to a tensor using __init__, applies the above transformation to x using the __getitenm__ function in the NonIIDMyDataset
#and loads the data in batches (in order to train it with a neural network later) and stores the loaded data into client_loader list created above
def noniid_train_loader(bsz=10):
    client_loader_train, client_loader_val = [], []  #this list is used to store the train data loaded using 'DataLoader' module from torch in batches

    #for all the no_clients clients
    for i in range(no_clients):
        #go to the folder /content/drive/MyDrive/dataset/practical/<no_clients>/train/, read the file from client_num.npz (liek 1.npz, 2.npz ... 20.npz) and store the X and y values
        file_path = str(i)+'.npz'
        loc = train_location + file_path
        data = np.load(loc)
        X = list(data['features'])
        Y = list(data['labels'])

        #create an object called dataset which is an instance of the class NonIIDMyDataset
        dataset = NonIIDMyDataset(X, Y, transform=transform)

        dataset_train, dataset_val = torch.utils.data.random_split(dataset, [0.8, 0.2])
        #in batches of 10, load the whole dataset and store it in client_load
        client_load_train = torch.utils.data.DataLoader(dataset_train, batch_size=bsz, shuffle=True)
        client_load_val = torch.utils.data.DataLoader(dataset_val, batch_size=bsz, shuffle=True)

        #append every client's dataload into client_loader list
        client_loader_train.append(client_load_train)
        client_loader_val.append(client_load_val)

    print(client_loader_train, client_loader_val)  #you can see <no_clients> objects of torch dataloaders
    return client_loader_train, client_loader_val

In [9]:
[noniid_client_train_loader, noniid_client_val_loader] = noniid_train_loader(bsz = bsz) #call the above funtion to perform all the actions explained inside the func, noniid_train_loader

[<torch.utils.data.dataloader.DataLoader object at 0x7fa04f246310>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ef4a290>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea5f850>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8af50>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8b250>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8b6d0>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8ba90>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8bf90>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea5fc10>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4c690>] [<torch.utils.data.dataloader.DataLoader object at 0x7fa04ea7b410>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ee78850>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8ac50>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8b050>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ea8

In [10]:
#the exact same thing as in the func noniid_train_loader is done here. Expect that the data is extracted now read from the loacation /content/drive/MyDrive/dataset/practical/<no_clients>/test
test_loader = []
def noniid_test_loader(batch_size,shuffle):
    for i in range(no_clients):
        file_path = str(i)+'.npz'
        loc = test_location + file_path
        data = np.load(loc)
        X = list(data['features'])
        Y = list(data['labels'])

        dataset = NonIIDMyDataset(X, Y, transform=transform)
        client_load = torch.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True)

        test_loader.append(client_load)

    print(test_loader)
    return test_loader

In [11]:
test_loader = noniid_test_loader(batch_size = 1000, shuffle=False)  #test data is tranformed loaded in batches of 1000 and stored in test_loader

[<torch.utils.data.dataloader.DataLoader object at 0x7fa04ea78190>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4dad0>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4e410>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4e810>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4cbd0>, <torch.utils.data.dataloader.DataLoader object at 0x7fa04ef4bb10>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4ef10>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4f090>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4f250>, <torch.utils.data.dataloader.DataLoader object at 0x7fa02ef4f410>]


In [12]:
# non-iid
#this cell is totally just for observation
label_dist = torch.zeros(10)  #since we have 10 classes, create a torch array with 10 zeros
print(type(noniid_client_train_loader[0]))
print(noniid_client_train_loader[0])

#using a for-loop, count the no.of rows is dataset which has 10 classes respectively for client 1
for (x,y) in noniid_client_train_loader[0]:
    label_dist+= torch.sum(F.one_hot(y,num_classes=10), dim=0)  #one-hot encoding is explained int he next cell

print("non-iid: ", label_dist)
#I suppose there should be a line like label_dist = torch.zeros(10) here as well
for (x,y) in test_loader[0]:

    label_dist+= torch.sum(F.one_hot(y,num_classes=10), dim=0)

print("non-iid: ", label_dist)

<class 'torch.utils.data.dataloader.DataLoader'>
<torch.utils.data.dataloader.DataLoader object at 0x7fa04f246310>


NameError: name 'F' is not defined

'''
one hot encoding is a concept where we assign 1 for the class of that row and 0 for the rest
example say we have 5 classes in the dataset.
The classes of say 10 rows of data are 1 3 2 4 1 5 3 2 1 4. (i.e., 1st row of data belongs to class 1 ...)
After applying one hot encoding the classes of these 10 rows will be represented as
1th row : 1 0 0 0 0
2th row : 0 0 1 0 0
3th row : 0 1 0 0 0
4th row : 0 0 0 1 0
5th row : 1 0 0 0 0
6th row : 0 0 0 0 1
7th row : 0 0 1 0 0
8th row : 0 1 0 0 0
9th row : 1 0 0 0 0
10th row: 0 0 0 1 0
'''

In [None]:
#this function is only used to observe how many parameters are used in the neural network we create. It is only for observation. Not to effect the running of any code
#parameters in neural networks are like no.of weights or bias params included to the network. Check it out on the internet
def num_params(model):
    """ """
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
# define cnn
#A CNN (Convolutional Neural Network) is another kind of NN.
#In MLPs, there are layers like linear layers where a linear operation like y = w.T*x+b is applied (a linear operation) followed by activation
#Similarly, in CNN, as the name suggests, convolution is done on x (input) to get y (output) on some layers. Here kernels are used.
#I suggest you to look through some blogs and understand practically and mathematically

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.fc = nn.Linear(1024, 512)
        self.out = nn.Linear(512, 10)
        self.loss = 0
        self.len = 0
        self.control = {}
        self.delta_control = {}
        self.delta_y = {}

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2, 2) # [B x 32 x 12 x 12]
        x = F.max_pool2d(self.conv2(x), 2, 2) # [B x 64 x 4 x 4]
        x = x.flatten(1) # [B x 1024]
        x = F.relu(self.fc(x)) # [B x 512]
        x = self.out(x) # [B x 10]
        return x

print(CNN())
print(num_params(CNN()))

'''
The whole idea of neural network and data revolves around the below steps:
1. Create a basic neural network be it MLP, CNN, RNN
2. Transform & Normalize data to be able to train and validate data using the network
3. Change the weights etc., parameters of the neural network through back propogation or any other method
4. For the same choose a loss function and an optimizer.
5. Repeat until you reach some fixed no.of iterations or desired result

So basically train your network with initial weights and the data and predict ŷ.
Calculate loss/error using the loss func you choose. An example is (y-ŷ).
If the error is more, re-train the network with new weights. This is done through back propgation which is automatically done most of the times.
'''

In [None]:
criterion = nn.CrossEntropyLoss() #the loss function we chose is cross entropy. The mathematical formula is available on the internet

#the below function is used to validate (find the percentage of correctly predicted output)
def validate(model, client_loader):
    #change the model/network to evaluation mode and for the given client, predict ŷ = model(x). If ŷ=y, add 1 to correct
    model = model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for (t, (x,y)) in enumerate(client_loader):
            x = x.to(device)
            x = x.permute(0, 2, 3, 1)
            #print("x",x.shape)
            y = y.to(device)
            out = model(x)
            _, predicted = torch.max(out, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            #correct += torch.sum(torch.argmax(out, dim=1) == y).item()
            #total += x.shape[0]
    return correct/total

'''
Basically what is happening above is that, create a state_dict called aggregated_state_dict and intitialize it with our present client's data
Now run a for loop through all the other client's model and add their params (state_dict) to the aggregated_state_dict
Then to normalize it, divide all the params of aggreated_state_dict by no.of clients (20 here)
Then create a model structure for aggregated_model and load all these params into this.

But ig we can just add the params of all clients directly instead of that if it!=client_no statement. Cz at the end I feel we are just adding the params of all the local models.
'''

######################################################################
def test(cnn):
        cnn.eval()
        _, _, Dte = nn_seq_wind(cnn.name, cnn.B)
        pred = []
        y = []
        for (seq, target) in tqdm(Dte):
            with torch.no_grad():
                seq = seq.to(device)
                y_pred = cnn(seq)
                pred.extend(list(chain.from_iterable(y_pred.data.tolist())))
                y.extend(list(chain.from_iterable(target.data.tolist())))

        pred = np.array(pred)
        y = np.array(y)
        print("mae: ", mean_absolute_error(y, pred), "rmse: ", np.sqrt(mean_squared_error(y, pred)))
    
    print("\n\n-------------------Testing the final model on all the clients-------------------\n\n")
    
    model = read_object("./main.pkl")
    model.eval()
    
    c = clients
    for client in c:
        model.name = client
        test(model)
######################################################################

In [None]:

######################################################################
def train(cnn, main, client_loader_train, client_loader_val):
    cnn.train()
    cnn.len = len(client_loader_train)
    
    print("-------------------------------Training the Data-------------------------------")
   
    cnn_copy = copy.deepcopy(cnn)
    optimizer = AlgoOptimizer(cnn.parameters(), lr=lr, weight_decay=1e-5)
    lr_step = StepLR(optimizer, step_size=20, gamma=0.0001)
    # training
    min_epochs = 10
    best_model = None
    min_val_loss = 5
    
    for epoch in tqdm(range(K)):
        train_loss = []
        for (i, (x,y)) in enumerate(client_loader_train):
            x = x.to(device)
            x = x.permute(0, 2, 3, 1)
            
            y = y.to(device)
            optimizer.zero_grad()
            out = cnn(x)
            loss = criterion(out, y)
            train_loss.append(loss.item())
            loss.backward()
            optimizer.step(main.control, cnn.control)
            
        # validation
#         cnn.eval()
#         val_loss = 0.0
#         with torch.no_grad():
#             for (i, (x,y)) in enumerate(client_loader_val):
#                 x = x.to(device)
#                 x = x.permute(0, 2, 3, 1)

#                 y = y.to(device)
#                 out = cnn(x)
#                 loss = criterion(out, y)

#                 val_loss += loss.item()

#         val_loss /= len(client_loader_val)
        
#         if epoch + 1 >= min_epochs and val_loss < min_val_loss:
#             min_val_loss = val_loss
#             best_model = copy.deepcopy(cnn)

        print('epoch {:01d} train_loss {:.8f} '.format(epoch, np.mean(train_loss)))
        #cnn.train()

    temp = {}
    for k, v in cnn.named_parameters():
        temp[k] = v.data.clone()

    for k, v in cnn_copy.named_parameters():
        local_steps = K * len(client_loader_train)
        cnn.control[k] = cnn.control[k] - main.control[k] + (v.data - temp[k]) / (local_steps * 0.01)
        cnn.delta_y[k] = temp[k] - v.data
        cnn.delta_control[k] = cnn.control[k] - cnn_copy.control[k]

    return cnn

In [None]:
def aggregation(N, main):
    delta_x = {}
    delta_c = {}
    
    for k, v in main.named_parameters():
        delta_x[k] = torch.zeros_like(v.data)
        delta_c[k] = torch.zeros_like(v.data)

    for i in range(N):
        client = read_object("./clients/client"+str(i)+".pkl")
        for k, v in client.named_parameters():
            delta_x[k] += client.delta_y[k] / N  # averaging
            delta_c[k] += client.delta_control[k] / N  # averaging

    Ng = 1
    for k, v in main.named_parameters():
        v.data += (Ng)*delta_x[k].data
        main.control[k].data += delta_c[k].data * (N / N)

    return main

def caller(client_no, round_no):
    cnn = CNN().to(device)

    for k, v in cnn.named_parameters():
        cnn.control[k] = torch.zeros_like(v.data)
        cnn.delta_control[k] = torch.zeros_like(v.data)
        cnn.delta_y[k] = torch.zeros_like(v.data)

    if round_no == 0:
        main = read_object("./main.pkl")
    else:
        main = read_object("./clients/client"+str(client_no)+"_main.pkl")
        main = aggregation(N, main)

    save_object(main, "./clients/client"+str(client_no)+"_main.pkl")

    cnn = train(cnn, main, noniid_client_train_loader[client_no], noniid_client_val_loader[client_no])

    save_object(cnn, "./clients/client"+str(client_no)+".pkl")
        
    if client_no == N-1 and round_no == R-1:
        main = read_object("./main.pkl")
        main = aggregation(N, main)
        save_object(main, "./main.pkl")

In [None]:
######################################################################
# N: No.of clients
# Cper: Percentage of clients to be chosen for every communication round
# K: No.of update steps in the clients
# B: Batch size
# R: No.of communication rounds
# input_dim: Dimension of the input
# lr: learning rate

N, Cper, K, B, R = 10, 0.5, 10, 50, 10

clients = []
for task in range(1, 2):
    for zone in range(1, 11):
        clients.append("Task" + str(task) + "_W_Zone" + str(zone))

cnn = CNN().to(device)

for k, v in cnn.named_parameters():
    cnn.control[k] = torch.zeros_like(v.data)
    cnn.delta_control[k] = torch.zeros_like(v.data)
    cnn.delta_y[k] = torch.zeros_like(v.data)

save_object(cnn, "./main.pkl")

round_acc = []

for r in range(R):
    print("-----------------------------------Round " + str(r+1) + "-----------------------------------")

    for i in range(N):
        print("-----------------------------------Client " + str(i+1) + "-----------------------------------")
        caller(i, r)
        
    acc_val = 0
    for client_no in range(N):
        model = read_object("./clients/client"+str(client_no)+"_main.pkl")
        val_acc = validate(model, noniid_client_val_loader[client_no])
        acc_val = acc_val + val_acc
        print("client {}, validation acc: {}".format(client_no+1, val_acc))
        round_acc.append(val_acc)
        
    acc_val = acc_val/no_clients
    
    save_object(round_acc, "./round_"+str(r+1)+"_acc.pkl")
    print("client accuracies after round ", r, " ", round_acc)
    print('round_acc {} '.format(acc_val))
    
    round_acc = []
            
    
######################################################################

In [None]:
test_loader

x = np.arange(0,15)
plt.figure(figsize=(8,6))

plt.title("Scaffold test accuracy after $t$ rounds on non-iid MNIST")

plt.xlabel("Communication rounds $t$")
plt.ylabel("Test accuracy")
plt.axis([0, 15, 0.3, 1])



plt.axhline(y=0.7, color='r', linestyle='dashed')
plt.axhline(y=0.9, color='b', linestyle='dashed')

plt.plot(x, acc_cnn_noniid_r10_ep10, label='2NN, $m=10$, $E=1$')
