In [3]:
import sys
sys.path.insert(1, '../src/')
import torch
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import h5py
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import helper
import utils
import models
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.nn.utils import parameters_to_vector, vector_to_parameters
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import torchvision
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import math
import random
import shutil
import copy

In [4]:
class args:
    data='fedemnist'
    bs=128
    device='cuda:0'
    lr=0.01
    moment=0.9
    wd=0
    epoch=100
    nesterov=True

In [5]:
class H5Dataset(Dataset):
    def __init__(self, dataset, client_id):
        self.targets = torch.LongTensor(dataset[client_id]['label'])
        self.inputs = torch.Tensor(dataset[client_id]['pixels'])
        shape = self.inputs.shape
        self.inputs = self.inputs.view(shape[0], 1, shape[1], shape[2])
        
    def classes(self):
        return torch.unique(self.targets)
    
    def __add__(self, other): 
        self.targets = torch.cat( (self.targets, other.targets), 0)
        self.inputs = torch.cat( (self.inputs, other.inputs), 0)
        return self
    
    def to(self, device):
        self.targets = self.targets.to(device)
        self.inputs = self.inputs.to(device)

    def __len__(self):
        return self.targets.shape[0]

    def __getitem__(self, item):
        inp, target = self.inputs[item], self.targets[item]
        return inp, target

In [None]:
train_dir = '../data/Fed_EMNIST/fed_emnist_digitsonly_train.h5'
val_dir = '../data/Fed_EMNIST/fed_emnist_digitsonly_test.h5'

In [None]:
trainset = h5py.File(train_dir, 'r')['examples']
valset = h5py.File(val_dir, 'r')['examples']

In [None]:
users = [*dataset.keys()]
user_dict = {}
for i in range(len(users)):
    user_dict[users[i]] = i

In [None]:
for key in tqdm(user_dict):
    user_trainset = H5Dataset(trainset, key)
    user_valset = H5Dataset(valset, key)
    torch.save(user_data, f'../data/Fed_EMNIST/user_trainsets/user_{user_dict[key]}_trainset.pt')
    torch.save(user_data, f'../data/Fed_EMNIST/user_valsets/user_{user_dict[key]}_valset.pt')

In [None]:
train_dir = '../data/Fed_EMNIST/fed_emnist_all_trainset.pt'
val_dir = '../data/Fed_EMNIST/fed_emnist_all_valset.pt'

val_dataset = torch.load(val_dir)
train_dataset = torch.load(train_dir)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4, pin_memory=True)
val_loader =  DataLoader(val_dataset, batch_size=args.bs, shuffle=False, num_workers=4, pin_memory=True)

In [None]:
model = models.get_model(args.data).to(args.device)
criterion = nn.CrossEntropyLoss().to(args.device)
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.moment, weight_decay=args.wd,\
                            nesterov=args.nesterov)
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, verbose=True)

In [None]:
params = parameters_to_vector(model.parameters())

In [None]:
cpyed = copy.deepcopy(params.detach_())

In [None]:
cpyed

In [None]:
vector_to_parameters(params, model.parameters())

In [None]:
parameters_to_vector(model.parameters())

In [None]:
writer = SummaryWriter('fed-emnist')
start_time, end_time = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
start_time.record()

In [None]:
# training loop
for rnd in tqdm(range(1, args.epoch+1)):
    model.train()
    train_loss, train_acc = 0.0, 0.0
    for _, (inputs, labels) in enumerate(train_loader):
        # pass inputs to device, clear gradients
        inputs, labels = inputs.to(args.device, non_blocking=True),\
                        labels.to(args.device, non_blocking=True)
        optimizer.zero_grad()
        
        # forward-backward pass and update
        outputs = model(inputs)
        minibatch_loss = criterion(outputs, labels)
        minibatch_loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            # keep track of round loss/accuracy
            train_loss += minibatch_loss.item()*outputs.shape[0]
            _, pred_labels = torch.max(outputs, 1)
            train_acc += torch.sum(torch.eq(pred_labels.view(-1), labels)).item()
            
    with torch.no_grad():   
        # inference after round 
        train_loss, train_acc = train_loss/len(train_dataset), train_acc/len(train_dataset)       
        val_loss, (val_acc, val_per_class) = infer.get_loss_n_accuracy(model, criterion, val_loader, args)                                  
        scheduler.step(val_loss)
        # log/print data
        writer.add_scalar('Validation/Loss', val_loss, rnd)
        writer.add_scalar('Validation/Accuracy', val_acc, rnd)
        writer.add_scalar('Training/Loss', train_loss, rnd)
        writer.add_scalar('Training/Accuracy', train_acc, rnd)
        print(f'|Train/Valid Loss: {train_loss:.3f} / {val_loss:.3f}|', end='--')
        print(f'|Train/Valid Acc: {train_acc:.3f} / {val_acc:.3f}|', end='\r')

In [None]:
end_time.record()
torch.cuda.synchronize()
time_elapsed_secs = start_time.elapsed_time(end_time)/10**3
time_elapsed_mins = time_elapsed_secs/60
print(f'Training took {time_elapsed_secs:.2f} seconds / {time_elapsed_mins:.2f} minutes')