In [1]:
import argparse
import torch
import numpy as np
import os
import datetime
import torch.nn as nn
import torchvision
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import time

from collections import OrderedDict
from typing import List
import flwr as fl
import matplotlib.pyplot as plt

print("flwr", fl.__version__)
print("numpy", np.__version__)
print("torch", torch.__version__)
print("torchvision", torchvision.__version__)

DEVICE = torch.device("cpu")
# DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on {DEVICE}")

flwr 0.18.0
numpy 1.21.5
torch 1.11.0+cu102
torchvision 0.12.0+cu102
Training on cpu


## Data Loading and Preprocessing

In [2]:
import pandas as pd
df = pd.read_csv('./jd_computer_final1.csv')

In [3]:
len(df)

1429297

In [4]:
NUM_CLIENTS = 299

In [5]:
session = []
session = df['Session_ID'].unique()

In [6]:
session

array([     0,      1,      6, ..., 999995, 999997, 999999])

In [7]:
partition_size = 999999 // NUM_CLIENTS

In [8]:
len(session)

488656

In [9]:
partition_size

3344

In [10]:
df2 = [0 for i in range(NUM_CLIENTS + 1)]

In [11]:
for i in range(NUM_CLIENTS + 1):
    if i == 0:
        df2[i] = df[df['Session_ID'].between((i * partition_size) - 1, ((i+1) * partition_size), inclusive='right')]
    else:
        df2[i] = df[df['Session_ID'].between((i * partition_size), ((i+1) * partition_size), inclusive='right')]
    

In [12]:
len(df2)

300

In [13]:
len_val = partition_size // 10 

In [14]:
len_val

334

In [15]:
trainclient = [0 for i in range(NUM_CLIENTS + 1)]
valclient = [0 for i in range(NUM_CLIENTS + 1)]
for i in range(NUM_CLIENTS + 1):
    trainclient[i] = df2[i][df2[i]['Session_ID'] < (((i+1) * partition_size) - len_val)]
    valclient[i] = df2[i][df2[i]['Session_ID'] >= (((i+1) * partition_size) - len_val)]

In [16]:
class Dataset(object):
    def __init__(self, path, sep=',', session_key='Session_ID', item_key='category', time_key='Seq', n_sample=-1, itemmap=None, itemstamp=None, time_sort=True):
        # Read csv
        #self.df = pd.read_csv(path, sep=sep, dtype={session_key: int, item_key: int, time_key: float})
        self.df = path
        self.session_key = session_key
        self.item_key = item_key
        self.time_key = time_key
        self.time_sort = time_sort
        if n_sample > 0:
            self.df = self.df[:n_sample]

        # Add colummn item index to data
        self.add_item_indices(itemmap=itemmap)
        """
        Sort the df by time, and then by session ID. That is, df is sorted by session ID and
        clicks within a session are next to each other, where the clicks within a session are time-ordered.
        """
        self.df.sort_values([session_key, time_key], inplace=True)
        self.click_offsets = self.get_click_offset()
        self.session_idx_arr = self.order_session_idx()

    def add_item_indices(self, itemmap=None):
        """
        Add item index column named "item_idx" to the df
        Args:
            itemmap (pd.DataFrame): mapping between the item Ids and indices
        """
        if itemmap is None:
            item_ids = self.df[self.item_key].unique()  # type is numpy.ndarray
            item2idx = pd.Series(data=np.arange(len(item_ids)),
                                 index=item_ids)
            # Build itemmap is a DataFrame that have 2 columns (self.item_key, 'item_idx)
            itemmap = pd.DataFrame({self.item_key: item_ids,
                                   'item_idx': item2idx[item_ids].values})
        self.itemmap = itemmap
        self.df = pd.merge(self.df, self.itemmap, on=self.item_key, how='inner')

    def get_click_offset(self):
        """
        self.df[self.session_key] return a set of session_key
        self.df[self.session_key].nunique() return the size of session_key set (int)
        self.df.groupby(self.session_key).size() return the size of each session_id
        self.df.groupby(self.session_key).size().cumsum() retunn cumulative sum
        """
        offsets = np.zeros(self.df[self.session_key].nunique() + 1, dtype=np.int32)
        offsets[1:] = self.df.groupby(self.session_key).size().cumsum()
        return offsets

    def order_session_idx(self):
        if self.time_sort:
            sessions_start_time = self.df.groupby(self.session_key)[self.time_key].min().values
            session_idx_arr = np.argsort(sessions_start_time)
        else:
            session_idx_arr = np.arange(self.df[self.session_key].nunique())
        return session_idx_arr

    @property
    def items(self):
        return self.itemmap[self.item_key].unique()

In [17]:
class DataLoader():
    def __init__(self, dataset, batch_size=50):
        """
        A class for creating session-parallel mini-batches.

        Args:
             dataset (SessionDataset): the session dataset to generate the batches from
             batch_size (int): size of the batch
        """
        self.dataset = dataset
        self.batch_size = batch_size

    def __iter__(self):
        """ Returns the iterator for producing session-parallel training mini-batches.

        Yields:
            input (B,): torch.FloatTensor. Item indices that will be encoded as one-hot vectors later.
            target (B,): a Variable that stores the target item indices
            masks: Numpy array indicating the positions of the sessions to be terminated
        """
        # initializations
        df = self.dataset.df
        click_offsets = self.dataset.click_offsets
        session_idx_arr = self.dataset.session_idx_arr

        iters = np.arange(self.batch_size)
        maxiter = iters.max()
        start = click_offsets[session_idx_arr[iters]]
        end = click_offsets[session_idx_arr[iters] + 1]
        mask = []  # indicator for the sessions to be terminated
        finished = False

        while not finished:
            minlen = (end - start).min()
            # Item indices(for embedding) for clicks where the first sessions start
            idx_target = df.item_idx.values[start]

            for i in range(minlen - 1):
                # Build inputs & targets
                idx_input = idx_target
                idx_target = df.item_idx.values[start + i + 1]
                input = torch.LongTensor(idx_input)
                target = torch.LongTensor(idx_target)
                yield input, target, mask

            # click indices where a particular session meets second-to-last element
            start = start + (minlen - 1)
            # see if how many sessions should terminate
            mask = np.arange(len(iters))[(end - start) <= 1]
            for idx in mask:
                maxiter += 1
                if maxiter >= len(click_offsets) - 1:
                    finished = True
                    break
                # update the next starting/ending point
                iters[idx] = maxiter
                start[idx] = click_offsets[session_idx_arr[maxiter]]
                end[idx] = click_offsets[session_idx_arr[maxiter] + 1]

In [18]:
train_data = [0 for i in range(NUM_CLIENTS + 1)]
valid_data = [0 for i in range(NUM_CLIENTS + 1)]
for i in range(NUM_CLIENTS + 1):
    train_data[i] = Dataset(trainclient[i])
    valid_data[i] = Dataset(valclient[i])

## Single Setting

In [19]:
class TOP1_max(nn.Module):
    def __init__(self):
        super(TOP1_max, self).__init__()

    def forward(self, logit):
        logit_softmax = F.softmax(logit, dim=1)
        diff = -(logit.diag().view(-1, 1).expand_as(logit) - logit)
        loss = torch.mean(logit_softmax * (torch.sigmoid(diff) + torch.sigmoid(logit ** 2)))
        return loss
    
class TOP1Loss(nn.Module):
    def __init__(self):
        super(TOP1Loss, self).__init__()
    def forward(self, logit):
        """
        Args:
            logit (BxB): Variable that stores the logits for the items in the mini-batch
                         The first dimension corresponds to the batches, and the second
                         dimension corresponds to sampled number of items to evaluate
        """
        diff = -(logit.diag().view(-1, 1).expand_as(logit) - logit)
        loss = torch.sigmoid(diff).mean() + torch.sigmoid(logit ** 2).mean()
        return loss    
    
class LossFunction(nn.Module):
    def __init__(self, loss_type='TOP1', use_cuda=False):
        """ An abstract loss function that can supports custom loss functions compatible with PyTorch."""
        super(LossFunction, self).__init__()
        self.loss_type = loss_type
        self.use_cuda = use_cuda
        if loss_type == 'TOP1-max':
            self._loss_fn = TOP1_max()
        elif loss_type == 'TOP1':
            self._loss_fn = TOP1Loss()
        else:
            raise NotImplementedError

    def forward(self, logit):
        return self._loss_fn(logit) 

In [20]:
loss_function = LossFunction('TOP1-max')

In [21]:
class GRU4REC(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, final_act='tanh',
                 dropout_hidden=.5, dropout_input=0, batch_size=50, embedding_dim=-1, use_cuda=False):
        super(GRU4REC, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.dropout_hidden = dropout_hidden
        self.dropout_input = dropout_input
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.use_cuda = use_cuda
        self.device = torch.device('cuda' if use_cuda else 'cpu')
        self.onehot_buffer = self.init_emb()
        self.h2o = nn.Linear(hidden_size, output_size)
        self.create_final_activation(final_act)
        if self.embedding_dim != -1:
            self.look_up = nn.Embedding(input_size, self.embedding_dim)
            self.gru = nn.GRU(self.embedding_dim, self.hidden_size, self.num_layers, dropout=self.dropout_hidden)
        else:
            self.gru = nn.GRU(self.input_size, self.hidden_size, self.num_layers, dropout=self.dropout_hidden)
        self = self.to(self.device)

    def create_final_activation(self, final_act):
        if final_act == 'tanh':
            self.final_activation = nn.Tanh()
        elif final_act == 'relu':
            self.final_activation = nn.ReLU()
        elif final_act == 'softmax':
            self.final_activation = nn.Softmax()
        elif final_act == 'softmax_logit':
            self.final_activation = nn.LogSoftmax()
        elif final_act.startswith('elu-'):
            self.final_activation = nn.ELU(alpha=float(final_act.split('-')[1]))
        elif final_act.startswith('leaky-'):
            self.final_activation = nn.LeakyReLU(negative_slope=float(final_act.split('-')[1]))

    def forward(self, input, hidden):
        '''
        Args:
            input (B,): a batch of item indices from a session-parallel mini-batch.
            target (B,): torch.LongTensor of next item indices from a session-parallel mini-batch.

        Returns:
            logit (B,C): Variable that stores the logits for the next items in the session-parallel mini-batch
            hidden: GRU hidden state
        '''

        if self.embedding_dim == -1:
            embedded = self.onehot_encode(input)
            if self.training and self.dropout_input > 0: embedded = self.embedding_dropout(embedded)
            embedded = embedded.unsqueeze(0)
        else:
            embedded = input.unsqueeze(0)
            embedded = self.look_up(embedded)

        output, hidden = self.gru(embedded, hidden) #(num_layer, B, H)
        output = output.view(-1, output.size(-1))  #(B,H)
        logit = self.final_activation(self.h2o(output))

        return logit, hidden

    def init_emb(self):
        '''
        Initialize the one_hot embedding buffer, which will be used for producing the one-hot embeddings efficiently
        '''
        onehot_buffer = torch.FloatTensor(self.batch_size, self.output_size)
        onehot_buffer = onehot_buffer.to(self.device)
        return onehot_buffer

    def onehot_encode(self, input):
        """
        Returns a one-hot vector corresponding to the input
        Args:
            input (B,): torch.LongTensor of item indices
            buffer (B,output_size): buffer that stores the one-hot vector
        Returns:
            one_hot (B,C): torch.FloatTensor of one-hot vectors
        """
        self.onehot_buffer.zero_()
        index = input.view(-1, 1)
        one_hot = self.onehot_buffer.scatter_(1, index, 1)
        return one_hot

    def embedding_dropout(self, input):
        p_drop = torch.Tensor(input.size(0), 1).fill_(1 - self.dropout_input)
        mask = torch.bernoulli(p_drop).expand_as(input) / (1 - self.dropout_input)
        mask = mask.to(self.device)
        input = input * mask
        return input

    def init_hidden(self):
        '''
        Initialize the hidden state of the GRU
        '''
        try:
            h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(self.device)
        except:
            self.device = 'cpu'
            h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(self.device)
        return h0

In [128]:
input_size = len(train_data[105].items)
hidden_size = 100
num_layers = 1
output_size = input_size
batch_size = 64
dropout_input = 0
dropout_hidden = 0.5
embedding_dim = -1
final_act = 'tanh'
loss_type = 'TOP1-max'
optimizer_type = 'Adagrad'
lr = 0.05
weight_decay = 0
momentum = 0
eps = 1e-6
n_epochs = 5
time_sort = False
sigma = None

In [129]:
input_size

71

In [130]:
cuda = False

In [131]:
#use random seed defined
np.random.seed(22)
torch.manual_seed(22)

<torch._C.Generator at 0x7f687815e4b0>

In [132]:
seed = 0
if cuda:
    torch.cuda.manual_seed(seed)

In [133]:
loss_function = LossFunction(loss_type=loss_type, use_cuda=cuda)

In [134]:
model = GRU4REC(input_size, hidden_size, output_size, final_act=final_act,
                            num_layers=num_layers, use_cuda=cuda, batch_size=batch_size,
                            dropout_input=dropout_input, dropout_hidden=dropout_hidden, embedding_dim=embedding_dim)

In [135]:
def init_model(model):
    global sigma
    if sigma is not None:
        for p in model.parameters():
            if sigma != -1 and sigma != -2:
                sigma = sigma
                p.data.uniform_(-sigma, sigma)
            elif len(list(p.size())) > 1:
                sigma = np.sqrt(6.0 / (p.size(0) + p.size(1)))
                if sigma == -1:
                    p.data.uniform_(-sigma, sigma)
                else:
                    p.data.uniform_(0, sigma)

In [136]:
init_model(model)

In [137]:
class Optimizer:
    def __init__(self, params, optimizer_type='Adagrad', lr=.05,
                 momentum=0, weight_decay=0, eps=1e-6):
        '''
        An abstract optimizer class for handling various kinds of optimizers.
        You can specify the optimizer type and related parameters as you want.
        Usage is exactly the same as an instance of torch.optim

        Args:
            params: torch.nn.Parameter. The NN parameters to optimize
            optimizer_type: type of the optimizer to use
            lr: learning rate
            momentum: momentum, if needed
            weight_decay: weight decay, if needed. Equivalent to L2 regulariztion.
            eps: eps parameter, if needed.
        '''
        if optimizer_type == 'RMSProp':
            self.optimizer = optim.RMSprop(params, lr=lr, eps=eps, weight_decay=weight_decay, momentum=momentum)
        elif optimizer_type == 'Adagrad':
            self.optimizer = optim.Adagrad(params, lr=lr, weight_decay=weight_decay)
        elif optimizer_type == 'Adadelta':
            self.optimizer = optim.Adadelta(params, lr=lr, eps=eps, weight_decay=weight_decay)
        elif optimizer_type == 'Adam':
            self.optimizer = optim.Adam(params, lr=lr, eps=eps, weight_decay=weight_decay)
        elif optimizer_type == 'SparseAdam':
            self.optimizer = optim.SparseAdam(params, lr=lr, eps=eps)
        elif optimizer_type == 'SGD':
            self.optimizer = optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay)
        else:
            raise NotImplementedError

    def zero_grad(self):
        self.optimizer.zero_grad()

    def step(self):
        self.optimizer.step()

In [138]:
optimizer = Optimizer(model.parameters(), optimizer_type=optimizer_type, lr=lr, weight_decay=weight_decay, momentum=momentum, eps=eps)

In [139]:
def get_recall(indices, targets): #recall --> wether next item in session is within top K=20 recommended items or not
    """
    Calculates the recall score for the given predictions and targets
    Args:
        indices (Bxk): torch.LongTensor. top-k indices predicted by the model.
        targets (B): torch.LongTensor. actual target indices.
    Returns:
        recall (float): the recall score
    """
    targets = targets.view(-1, 1).expand_as(indices)
    hits = (targets == indices).nonzero()
    if len(hits) == 0:
        return 0
    n_hits = (targets == indices).nonzero()[:, :-1].size(0)
    recall = float(n_hits) / targets.size(0)
    return recall

In [140]:
def get_mrr(indices, targets): #Mean Receiprocal Rank --> Average of rank of next item in the session.
    """
    Calculates the MRR score for the given predictions and targets
    Args:
        indices (Bxk): torch.LongTensor. top-k indices predicted by the model.
        targets (B): torch.LongTensor. actual target indices.
    Returns:
        mrr (float): the mrr score
    """
    tmp = targets.view(-1, 1)
    targets = tmp.expand_as(indices)
    hits = (targets == indices).nonzero()
    ranks = hits[:, -1] + 1
    ranks = ranks.float()
    rranks = torch.reciprocal(ranks)
    mrr = torch.sum(rranks).data / targets.size(0)
    return mrr

In [141]:
def evaluate(indices, targets, k=20):
    """
    Evaluates the model using Recall@K, MRR@K scores.

    Args:
        logits (B,C): torch.LongTensor. The predicted logit for the next items.
        targets (B): torch.LongTensor. actual target indices.

    Returns:
        recall (float): the recall score
        mrr (float): the mrr score
    """
    _, indices = torch.topk(indices, k, -1)
    recall = get_recall(indices, targets)
    mrr = get_mrr(indices, targets)
    return recall, mrr

In [142]:
class Evaluation(object):
    def __init__(self, model, loss_func, use_cuda, k=5):
        self.model = model
        self.loss_func = loss_func
        self.topk = k
        self.device = torch.device('cuda' if use_cuda else 'cpu')
        #self.device = torch.device('cpu')

    def eval(self, eval_data, batch_size):
        self.model.eval()
        losses = []
        recalls = []
        mrrs = []
        dataloader = DataLoader(eval_data, batch_size)
        with torch.no_grad():
            hidden = self.model.init_hidden()
            for ii, (input, target, mask) in enumerate(dataloader):
            #for input, target, mask in dataloader:
                input = input.to(self.device)
                target = target.to(self.device)
                logit, hidden = self.model(input, hidden)
                logit_sampled = logit[:, target.view(-1)]
                loss = self.loss_func(logit_sampled)
                recall, mrr = evaluate(logit, target, k=self.topk)

                # torch.Tensor.item() to get a Python number from a tensor containing a single value
                losses.append(loss.item())
                recalls.append(recall)
                mrrs.append(mrr.cpu())
        mean_losses = np.mean(losses)
        mean_recall = np.mean(recalls)
        mean_mrr = np.mean(mrrs)
        #mean_mrr = 0

        return mean_losses, mean_recall, mean_mrr

In [143]:
class Trainer(object):
    def __init__(self, model, train_data, eval_data, optim, use_cuda, loss_func, batch_size):
        self.model = model
        self.train_data = train_data
        self.eval_data = eval_data
        self.optim = optim
        self.loss_func = loss_func
        self.evaluation = Evaluation(self.model, self.loss_func, use_cuda, k = 5)
        self.device = torch.device('cuda' if use_cuda else 'cpu')
        #self.device = torch.device('cpu')
        self.batch_size = batch_size
        #self.args = args

    def train(self, start_epoch, end_epoch, start_time=None):
        if start_time is None:
            self.start_time = time.time()
        else:
            self.start_time = start_time

        for epoch in range(start_epoch, end_epoch + 1):
            st = time.time()
            print('Start Epoch #', epoch)
            train_loss = self.train_epoch(epoch)
            loss, recall, mrr = self.evaluation.eval(self.eval_data, self.batch_size)


            print("Epoch: {}, train loss: {:.4f}, loss: {:.4f}, recall: {:.4f}, mrr: {:.4f}, time: {}".format(epoch, train_loss, loss, recall, mrr, time.time() - st))
            checkpoint = {
                'model': self.model,
                'epoch': epoch,
                'optim': self.optim,
                'loss': loss,
                'recall': recall,
                'mrr': mrr
            }
            #model_name = os.path.join('checkpoint', "model_{0:05d}.pt".format(epoch))
            #torch.save(checkpoint, model_name)
            #print("Save model as %s" % model_name)


    def train_epoch(self, epoch):
        self.model.train()
        losses = []

        def reset_hidden(hidden, mask):
            """Helper function that resets hidden state when some sessions terminate"""
            if len(mask) != 0:
                hidden[:, mask, :] = 0
            return hidden

        hidden = self.model.init_hidden()
        dataloader = DataLoader(self.train_data, self.batch_size)
        #for ii,(data,label) in tqdm(enumerate(train_dataloader),total=len(train_data)):
        for ii, (input, target, mask) in enumerate(dataloader):
            input = input.to(self.device)
            target = target.to(self.device)
            self.optim.zero_grad()
            hidden = reset_hidden(hidden, mask).detach()
            logit, hidden = self.model(input, hidden)
            # output sampling
            logit_sampled = logit[:, target.view(-1)]
            loss = self.loss_func(logit_sampled)
            losses.append(loss.item())
            loss.backward()
            self.optim.step()

        mean_losses = np.mean(losses)
        return mean_losses

In [144]:
trainer = Trainer(model, train_data=train_data[105], eval_data=valid_data[105], optim=optimizer, use_cuda=cuda, loss_func=loss_function, batch_size=batch_size)

In [145]:
print('#### START TRAINING....')
trainer.train(0, n_epochs - 1)

#### START TRAINING....
Start Epoch # 0
Epoch: 0, train loss: 0.0144, loss: 0.0148, recall: 0.5547, mrr: 0.5068, time: 0.07386970520019531
Start Epoch # 1
Epoch: 1, train loss: 0.0140, loss: 0.0148, recall: 0.5547, mrr: 0.5013, time: 0.06340599060058594
Start Epoch # 2
Epoch: 2, train loss: 0.0139, loss: 0.0148, recall: 0.5547, mrr: 0.4928, time: 0.05922079086303711
Start Epoch # 3
Epoch: 3, train loss: 0.0139, loss: 0.0148, recall: 0.5469, mrr: 0.4918, time: 0.06041860580444336
Start Epoch # 4
Epoch: 4, train loss: 0.0139, loss: 0.0148, recall: 0.5469, mrr: 0.4910, time: 0.05916309356689453


## FL

In [40]:
#use random seed defined
np.random.seed(22)
torch.manual_seed(22)

<torch._C.Generator at 0x7f687815e4b0>

In [41]:
class Trainer(object):
    def __init__(self, model, train_data, eval_data, optim, use_cuda, loss_func, batch_size, clientID = 0):
        self.model = model
        self.train_data = train_data
        self.eval_data = eval_data
        self.optim = optim
        self.loss_func = loss_func
        self.evaluation = Evaluation(self.model, self.loss_func, use_cuda, k = 5)
        self.device = torch.device('cuda' if use_cuda else 'cpu')
        #self.device = torch.device('cpu')
        self.batch_size = batch_size
        self.clientID = clientID
        #self.args = args

    def train(self, start_epoch, end_epoch, start_time=None):
        if start_time is None:
            self.start_time = time.time()
        else:
            self.start_time = start_time

        for epoch in range(start_epoch, end_epoch + 1):
            st = time.time()
            print('Start Epoch #', self.clientID)
            train_loss = self.train_epoch(epoch)
            loss, recall, mrr = self.evaluation.eval(self.eval_data, self.batch_size)


            print("client: {}, train loss: {:.4f}, loss: {:.4f}, recall: {:.4f}, mrr: {:.4f}, time: {}".format(self.clientID, train_loss, loss, recall, mrr, time.time() - st))
            checkpoint = {
                'model': self.model,
                'epoch': epoch,
                'optim': self.optim,
                'loss': loss,
                'recall': recall,
                'mrr': mrr
            }
            #model_name = os.path.join('checkpoint', "model_{0:05d}.pt".format(epoch))
            #torch.save(checkpoint, model_name)
            #print("Save model as %s" % model_name)


    def train_epoch(self, epoch):
        self.model.train()
        losses = []

        def reset_hidden(hidden, mask):
            """Helper function that resets hidden state when some sessions terminate"""
            if len(mask) != 0:
                hidden[:, mask, :] = 0
            return hidden

        hidden = self.model.init_hidden()
        dataloader = DataLoader(self.train_data, self.batch_size)
        #for ii,(data,label) in tqdm(enumerate(train_dataloader),total=len(train_data)):
        for ii, (input, target, mask) in enumerate(dataloader):
            input = input.to(self.device)
            target = target.to(self.device)
            self.optim.zero_grad()
            hidden = reset_hidden(hidden, mask).detach()
            logit, hidden = self.model(input, hidden)
            # output sampling
            logit_sampled = logit[:, target.view(-1)]
            loss = self.loss_func(logit_sampled)
            losses.append(loss.item())
            loss.backward()
            self.optim.step()

        mean_losses = np.mean(losses)
        return mean_losses

In [42]:
def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

In [43]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, net, trainloader, valloader, clientID = 0):
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader
        self.clientID = clientID

    def get_parameters(self):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        trainer = Trainer(model, train_data=self.trainloader, eval_data=self.valloader, optim=optimizer, use_cuda=False, loss_func=loss_function, batch_size=batch_size, clientID=self.clientID)
        trainer.train(0, 0)
        #print(len(self.trainloader.df))
        return get_parameters(self.net), len(self.trainloader.df), {}

    def evaluate(self, parameters, config):
        set_parameters(self.net, parameters)
        evaluation = Evaluation(self.net, loss_function, use_cuda= False, k = 5)
        loss, recall, mrr = evaluation.eval(self.valloader, 64)
        #loss, accuracy = test(self.net, self.valloader)
        #print ("print: ", recall)
        return float(loss), len(self.valloader.df), {"accuracy": float(recall)}



In [44]:
def client_fn(cid: str) -> FlowerClient:
    """Create a Flower client representing a single organization."""

    # Load model
    model = GRU4REC(87, hidden_size, 87, final_act=final_act,
                            num_layers=num_layers, use_cuda=False, batch_size=batch_size,
                            dropout_input=dropout_input, dropout_hidden=dropout_hidden, embedding_dim=embedding_dim).to(DEVICE)
    
    init_model(model)


    # Load data (CIFAR-10)
    # Note: each client gets a different trainloader/valloader, so each client
    # will train and evaluate on their own unique data
    trainloader = train_data[int(cid)]
    valloader = valid_data[int(cid)]

    # Create a  single Flower client representing a single organization
    return FlowerClient(model, trainloader, valloader, int(cid))



In [45]:
# Create FedAvg strategy
strategy = fl.server.strategy.FedAvg(
        fraction_fit=1.0,  # Sample 100% of available clients for training
        fraction_eval=0.5,  # Sample 50% of available clients for evaluation
        min_fit_clients=1,  # Never sample less than 10 clients for training
        min_eval_clients=1,  # Never sample less than 5 clients for evaluation
        min_available_clients=1,  # Wait until all 10 clients are available
)

# Start simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS+1,
    num_rounds=5,
    strategy=strategy,
)

INFO flower 2022-06-17 12:42:50,284 | app.py:147 | Ray initialized with resources: {'object_store_memory': 4760811110.0, 'memory': 9521622222.0, 'GPU': 1.0, 'node:220.67.127.72': 1.0, 'accelerator_type:G': 1.0, 'CPU': 16.0}
INFO flower 2022-06-17 12:42:50,285 | app.py:156 | Starting Flower simulation running: {'num_rounds': 5}
INFO flower 2022-06-17 12:42:50,286 | server.py:128 | Initializing global parameters
INFO flower 2022-06-17 12:42:50,286 | server.py:327 | Requesting initial parameters from one random client
INFO flower 2022-06-17 12:42:51,131 | server.py:330 | Received initial parameters from one random client
INFO flower 2022-06-17 12:42:51,132 | server.py:130 | Evaluating initial parameters
INFO flower 2022-06-17 12:42:51,132 | server.py:143 | FL starting
DEBUG flower 2022-06-17 12:42:51,133 | server.py:269 | fit_round: strategy sampled 300 clients (out of 300)
[2m[36m(launch_and_get_parameters pid=318485)[0m   "num_layers={}".format(dropout, num_layers))


[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 297
[2m[36m(launch_and_fit pid=318485)[0m client: 297, train loss: 0.0144, loss: 0.0144, recall: 0.6484, mrr: 0.6217, time: 4.1579508781433105
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 104
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 273
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 113
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 102
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 272
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 24
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 285
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 85
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 230
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 145
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 97
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 259
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 41
[2m[36m(launch_and_fit pid=318487)

[2m[36m(launch_and_fit pid=318491)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318496)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318484)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318494)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318482)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318486)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318483)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318487)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318489)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318488)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318493)[0m   "num_layers={}".format(dropout, num_layers))
[2m[36m(launch_and_fit pid=318

[2m[36m(launch_and_fit pid=318484)[0m client: 273, train loss: 0.0143, loss: 0.0146, recall: 0.5938, mrr: 0.5653, time: 0.20922112464904785
[2m[36m(launch_and_fit pid=318486)[0m client: 102, train loss: 0.0144, loss: 0.0145, recall: 0.6615, mrr: 0.6044, time: 0.22640585899353027
[2m[36m(launch_and_fit pid=318483)[0m client: 272, train loss: 0.0143, loss: 0.0143, recall: 0.6667, mrr: 0.6201, time: 0.19410419464111328
[2m[36m(launch_and_fit pid=318496)[0m client: 24, train loss: 0.0142, loss: 0.0145, recall: 0.6953, mrr: 0.5940, time: 0.1509106159210205
[2m[36m(launch_and_fit pid=318494)[0m client: 285, train loss: 0.0144, loss: 0.0144, recall: 0.6406, mrr: 0.6250, time: 0.17308998107910156
[2m[36m(launch_and_fit pid=318489)[0m client: 85, train loss: 0.0143, loss: 0.0143, recall: 0.7188, mrr: 0.7047, time: 0.15874481201171875
[2m[36m(launch_and_fit pid=318493)[0m client: 230, train loss: 0.0143, loss: 0.0146, recall: 0.5938, mrr: 0.5440, time: 0.16537714004516602
[

[2m[36m(raylet)[0m Spilled 2096 MiB, 31 objects, write throughput 24 MiB/s. Set RAY_verbose_spill_logs=0 to disable this message.


[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 257
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 38
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 5
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 50
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 156
[2m[36m(launch_and_fit pid=318482)[0m client: 257, train loss: 0.0142, loss: 0.0144, recall: 0.7344, mrr: 0.5729, time: 0.08571434020996094
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 143
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 106
[2m[36m(launch_and_fit pid=318496)[0m client: 38, train loss: 0.0143, loss: 0.0146, recall: 0.6875, mrr: 0.6398, time: 0.07843470573425293
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 240
[2m[36m(launch_and_fit pid=318495)[0m client: 5, train loss: 0.0144, loss: 0.0144, recall: 0.6797, mrr: 0.6184, time: 0.0866248607635498
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 174
[2m[36m(launch_and_fit pid=318488)[0m client:

[2m[36m(launch_and_fit pid=318482)[0m client: 74, train loss: 0.0143, loss: 0.0148, recall: 0.5781, mrr: 0.5390, time: 0.22105836868286133
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 84
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 43
[2m[36m(launch_and_fit pid=318494)[0m client: 43, train loss: 0.0142, loss: 0.0145, recall: 0.6875, mrr: 0.6523, time: 0.059334516525268555
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 3
[2m[36m(launch_and_fit pid=318496)[0m client: 84, train loss: 0.0144, loss: 0.0146, recall: 0.6406, mrr: 0.5408, time: 0.4382154941558838
[2m[36m(launch_and_fit pid=318484)[0m client: 3, train loss: 0.0143, loss: 0.0144, recall: 0.6562, mrr: 0.5861, time: 0.2512962818145752
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 58
[2m[36m(launch_and_fit pid=318486)[0m client: 58, train loss: 0.0143, loss: 0.0145, recall: 0.6484, mrr: 0.6246, time: 0.21901535987854004


[2m[36m(raylet)[0m Spilled 4189 MiB, 50 objects, write throughput 36 MiB/s.
[2m[36m(raylet)[0m Spilled 8258 MiB, 85 objects, write throughput 33 MiB/s.


[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 21
[2m[36m(launch_and_fit pid=318497)[0m client: 21, train loss: 0.0143, loss: 0.0144, recall: 0.7109, mrr: 0.6754, time: 20.188416242599487
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 92
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 12
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 195
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 122
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 151
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 239
[2m[36m(launch_and_fit pid=318482)[0m client: 92, train loss: 0.0143, loss: 0.0146, recall: 0.6875, mrr: 0.6057, time: 0.0869591236114502
[2m[36m(launch_and_fit pid=318491)[0m client: 12, train loss: 0.0142, loss: 0.0145, recall: 0.6875, mrr: 0.6660, time: 0.06515026092529297
[2m[36m(launch_and_fit pid=318483)[0m client: 195, train loss: 0.0144, loss: 0.0145, recall: 0.7656, mrr: 0.7161, time: 0.061858177185058594
[2m[36m(launch_and_f

[2m[36m(launch_and_fit pid=318485)[0m client: 181, train loss: 0.0142, loss: 0.0146, recall: 0.6719, mrr: 0.6237, time: 0.7685682773590088
[2m[36m(launch_and_fit pid=318482)[0m client: 254, train loss: 0.0143, loss: 0.0146, recall: 0.6719, mrr: 0.5905, time: 0.829627513885498
[2m[36m(launch_and_fit pid=318484)[0m client: 153, train loss: 0.0143, loss: 0.0146, recall: 0.7188, mrr: 0.6615, time: 0.8506159782409668
[2m[36m(launch_and_fit pid=318491)[0m client: 150, train loss: 0.0144, loss: 0.0143, recall: 0.6979, mrr: 0.6303, time: 0.7810003757476807
[2m[36m(launch_and_fit pid=318486)[0m client: 76, train loss: 0.0144, loss: 0.0147, recall: 0.6146, mrr: 0.5184, time: 0.815169095993042
[2m[36m(launch_and_fit pid=318483)[0m client: 4, train loss: 0.0143, loss: 0.0143, recall: 0.6953, mrr: 0.6305, time: 0.839867115020752
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 154
[2m[36m(launch_and_fit pid=318493)[0m client: 28, train loss: 0.0143, loss: 0.0143, recall: 

[2m[36m(raylet)[0m Spilled 16428 MiB, 282 objects, write throughput 43 MiB/s.
[2m[33m(raylet)[0m [2022-06-17 12:49:18,617 E 318403 318403] (raylet) worker_pool.cc:518: Some workers of the worker process(319584) have not registered within the timeout. The process is still alive, probably it's hanging during start.
[2m[33m(raylet)[0m [2022-06-17 12:49:18,620 E 318403 318403] (raylet) worker_pool.cc:518: Some workers of the worker process(319585) have not registered within the timeout. The process is still alive, probably it's hanging during start.


[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 184
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 127
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 179
[2m[36m(launch_and_fit pid=318489)[0m client: 184, train loss: 0.0143, loss: 0.0145, recall: 0.7266, mrr: 0.6857, time: 0.07916450500488281
[2m[36m(launch_and_fit pid=318495)[0m client: 127, train loss: 0.0143, loss: 0.0145, recall: 0.6484, mrr: 0.6253, time: 0.06346297264099121
[2m[36m(launch_and_fit pid=318490)[0m client: 179, train loss: 0.0143, loss: 0.0146, recall: 0.6719, mrr: 0.6276, time: 0.05352282524108887
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 136
[2m[36m(launch_and_fit pid=318487)[0m client: 136, train loss: 0.0143, loss: 0.0146, recall: 0.5469, mrr: 0.5071, time: 0.06372761726379395
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 213
[2m[36m(launch_and_fit pid=318492)[0m client: 213, train loss: 0.0144, loss: 0.0148, recall: 0.5781, mrr: 0.4879, time: 0.06241083

[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 72
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 202
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 16
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 211
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 46
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 287
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 291
[2m[36m(launch_and_fit pid=318482)[0m client: 72, train loss: 0.0143, loss: 0.0148, recall: 0.5938, mrr: 0.5529, time: 0.11527514457702637
[2m[36m(launch_and_fit pid=318484)[0m client: 202, train loss: 0.0143, loss: 0.0146, recall: 0.6953, mrr: 0.6698, time: 0.08748221397399902
[2m[36m(launch_and_fit pid=318483)[0m client: 16, train loss: 0.0144, loss: 0.0145, recall: 0.6354, mrr: 0.5807, time: 0.10434651374816895
[2m[36m(launch_and_fit pid=318489)[0m client: 211, train loss: 0.0144, loss: 0.0146, recall: 0.5625, mrr: 0.5194, time: 0.08929586410522461
[2m[36m(launch_and

[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 17
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 73
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 227
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 80
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 94
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 64
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 253
[2m[36m(launch_and_fit pid=318490)[0m client: 253, train loss: 0.0143, loss: 0.0142, recall: 0.6875, mrr: 0.6102, time: 0.09323787689208984
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 81
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 236
[2m[36m(launch_and_fit pid=318485)[0m client: 73, train loss: 0.0143, loss: 0.0148, recall: 0.5625, mrr: 0.4895, time: 0.11965322494506836
[2m[36m(launch_and_fit pid=318482)[0m client: 227, train loss: 0.0143, loss: 0.0147, recall: 0.6172, mrr: 0.6003, time: 0.14535069465637207
[2m[36m(launch_and_fit pid=318484)[0m Start 

[2m[36m(launch_and_fit pid=318482)[0m client: 286, train loss: 0.0143, loss: 0.0148, recall: 0.6094, mrr: 0.5820, time: 0.09827971458435059
[2m[36m(launch_and_fit pid=318491)[0m client: 178, train loss: 0.0144, loss: 0.0144, recall: 0.6406, mrr: 0.6190, time: 0.07909035682678223
[2m[36m(launch_and_fit pid=318486)[0m client: 14, train loss: 0.0143, loss: 0.0144, recall: 0.6510, mrr: 0.5925, time: 0.11878657341003418
[2m[36m(launch_and_fit pid=318494)[0m client: 123, train loss: 0.0144, loss: 0.0145, recall: 0.6641, mrr: 0.5559, time: 0.11269021034240723
[2m[36m(launch_and_fit pid=318489)[0m client: 152, train loss: 0.0144, loss: 0.0147, recall: 0.5859, mrr: 0.5428, time: 0.11505603790283203
[2m[36m(launch_and_fit pid=318488)[0m client: 48, train loss: 0.0141, loss: 0.0144, recall: 0.6797, mrr: 0.6253, time: 0.08141875267028809
[2m[36m(launch_and_fit pid=318492)[0m client: 277, train loss: 0.0144, loss: 0.0148, recall: 0.5469, mrr: 0.4754, time: 0.12880563735961914


[2m[36m(launch_and_fit pid=318483)[0m   out=out, **kwargs)
[2m[36m(launch_and_fit pid=318483)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 108
[2m[36m(launch_and_fit pid=318483)[0m client: 108, train loss: 0.0143, loss: 0.0145, recall: 0.6302, mrr: 0.5568, time: 0.07587337493896484
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 226
[2m[36m(launch_and_fit pid=318483)[0m client: 226, train loss: 0.0143, loss: 0.0146, recall: 0.5625, mrr: 0.5076, time: 0.05645418167114258
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 110
[2m[36m(launch_and_fit pid=318483)[0m client: 110, train loss: 0.0145, loss: 0.0146, recall: 0.6172, mrr: 0.5598, time: 0.07000160217285156
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 62
[2m[36m(launch_and_fit pid=318483)[0m client: 62, train loss: 0.0143, loss: 0.0145, recall: 0.6797, mrr: 0.6160, time: 0.057528018951416016


[2m[36m(raylet)[0m Spilled 32882 MiB, 663 objects, write throughput 48 MiB/s.


[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 200
[2m[36m(launch_and_fit pid=318483)[0m client: 200, train loss: 0.0143, loss: 0.0145, recall: 0.6094, mrr: 0.5898, time: 0.05363631248474121
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 25
[2m[36m(launch_and_fit pid=318483)[0m client: 25, train loss: 0.0143, loss: 0.0145, recall: 0.7188, mrr: 0.6276, time: 0.05237007141113281
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 167
[2m[36m(launch_and_fit pid=318483)[0m client: 167, train loss: 0.0143, loss: 0.0144, recall: 0.7500, mrr: 0.6945, time: 0.057466745376586914
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 196
[2m[36m(launch_and_fit pid=318483)[0m client: 196, train loss: 0.0144, loss: 0.0145, recall: 0.6458, mrr: 0.6007, time: 0.046274662017822266
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 36
[2m[36m(launch_and_fit pid=318487)[0m client: 36, train loss: 0.0143, loss: 0.0144, recall: 0.7422, mrr: 0.6885, time: 0.0576453208

DEBUG flower 2022-06-17 12:55:42,756 | server.py:281 | fit_round received 297 results and 3 failures


[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 103
[2m[36m(launch_and_fit pid=318483)[0m client: 103, train loss: 0.0143, loss: 0.0145, recall: 0.7188, mrr: 0.5661, time: 0.05695700645446777


DEBUG flower 2022-06-17 12:55:43,047 | server.py:215 | evaluate_round: strategy sampled 150 clients (out of 300)
DEBUG flower 2022-06-17 12:58:01,528 | server.py:227 | evaluate_round received 149 results and 1 failures
DEBUG flower 2022-06-17 12:58:01,539 | server.py:269 | fit_round: strategy sampled 300 clients (out of 300)


[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 293
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 296
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 29
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 40
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 85
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 289
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 290
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 228
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 270
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 288
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 99
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 17
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 216
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 286
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 299
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 240


[2m[36m(launch_and_fit pid=318487)[0m   out=out, **kwargs)
[2m[36m(launch_and_fit pid=318487)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(launch_and_fit pid=318485)[0m client: 29, train loss: 0.0145, loss: 0.0146, recall: 0.5781, mrr: 0.5310, time: 0.3903508186340332
[2m[36m(launch_and_fit pid=318482)[0m client: 40, train loss: 0.0142, loss: 0.0143, recall: 0.7656, mrr: 0.6987, time: 0.4155457019805908
[2m[36m(launch_and_fit pid=318484)[0m client: 85, train loss: 0.0143, loss: 0.0143, recall: 0.7188, mrr: 0.7047, time: 0.4255702495574951
[2m[36m(launch_and_fit pid=318491)[0m client: 289, train loss: 0.0143, loss: 0.0145, recall: 0.6875, mrr: 0.6410, time: 0.4191129207611084
[2m[36m(launch_and_fit pid=318486)[0m client: 293, train loss: 0.0143, loss: 0.0144, recall: 0.6667, mrr: 0.6050, time: 0.5004582405090332
[2m[36m(launch_and_fit pid=318483)[0m client: 290, train loss: 0.0143, loss: 0.0147, recall: 0.6250, mrr: 0.5657, time: 0.3609018325805664
[2m[36m(launch_and_fit pid=318496)[0m client: 296, train loss: 0.0143, loss: 0.0146, recall: 0.6562, mrr: 0.6305, time: 0.4531826972961426
[2m[36m

[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 170
[2m[36m(launch_and_fit pid=318493)[0m client: 170, train loss: 0.0143, loss: 0.0146, recall: 0.6094, mrr: 0.5098, time: 0.06359457969665527
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 89
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 161
[2m[36m(launch_and_fit pid=318484)[0m client: 87, train loss: 0.0143, loss: 0.0144, recall: 0.6797, mrr: 0.6105, time: 0.7437896728515625
[2m[36m(launch_and_fit pid=318489)[0m client: 61, train loss: 0.0143, loss: 0.0141, recall: 0.7266, mrr: 0.7000, time: 0.5904390811920166
[2m[36m(launch_and_fit pid=318490)[0m client: 86, train loss: 0.0143, loss: 0.0146, recall: 0.5990, mrr: 0.5689, time: 0.5220351219177246
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 215
[2m[36m(launch_and_fit pid=318492)[0m client: 215, train loss: 0.0144, loss: 0.0145, recall: 0.6484, mrr: 0.6253, time: 0.06388711929321289
[2m[36m(launch_and_fit pid=318491)[0m client: 81, tra

[2m[36m(raylet)[0m Spilled 65648 MiB, 1165 objects, write throughput 55 MiB/s.


[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 162
[2m[36m(launch_and_fit pid=318491)[0m client: 162, train loss: 0.0144, loss: 0.0145, recall: 0.5938, mrr: 0.5670, time: 0.06045866012573242
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 95
[2m[36m(launch_and_fit pid=318485)[0m client: 95, train loss: 0.0142, loss: 0.0145, recall: 0.5938, mrr: 0.5624, time: 0.0627436637878418
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 212
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 26
[2m[36m(launch_and_fit pid=318483)[0m client: 26, train loss: 0.0143, loss: 0.0144, recall: 0.5938, mrr: 0.5749, time: 0.05803418159484863
[2m[36m(launch_and_fit pid=318484)[0m client: 212, train loss: 0.0142, loss: 0.0147, recall: 0.5938, mrr: 0.5688, time: 0.06832742691040039
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 283
[2m[36m(launch_and_fit pid=318487)[0m client: 283, train loss: 0.0144, loss: 0.0144, recall: 0.6172, mrr: 0.5931, time: 0.0523331165313

[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 135
[2m[36m(launch_and_fit pid=318484)[0m client: 135, train loss: 0.0143, loss: 0.0145, recall: 0.6328, mrr: 0.5501, time: 0.06537580490112305
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 80
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 97
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 65
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 242
[2m[36m(launch_and_fit pid=318491)[0m client: 80, train loss: 0.0143, loss: 0.0146, recall: 0.6615, mrr: 0.5911, time: 0.08198070526123047
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 52
[2m[36m(launch_and_fit pid=318483)[0m client: 97, train loss: 0.0144, loss: 0.0144, recall: 0.6510, mrr: 0.6369, time: 0.11561226844787598
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 73
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 267
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 34
[2m[36m(launch_and_fit pid=318487)[0m client:

[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 45
[2m[36m(launch_and_fit pid=318494)[0m client: 45, train loss: 0.0143, loss: 0.0148, recall: 0.6094, mrr: 0.5794, time: 0.06032443046569824
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 203
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 208
[2m[36m(launch_and_fit pid=318488)[0m client: 203, train loss: 0.0144, loss: 0.0144, recall: 0.6641, mrr: 0.5908, time: 0.06035351753234863
[2m[36m(launch_and_fit pid=318496)[0m client: 208, train loss: 0.0143, loss: 0.0145, recall: 0.6406, mrr: 0.5952, time: 0.06119871139526367
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 4
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 156
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 174
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 113
[2m[36m(launch_and_fit pid=318486)[0m client: 113, train loss: 0.0142, loss: 0.0147, recall: 0.6406, mrr: 0.6077, time: 0.06834721565246582
[2m[36m(launch_an

[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 101
[2m[36m(launch_and_fit pid=318484)[0m client: 101, train loss: 0.0142, loss: 0.0147, recall: 0.6328, mrr: 0.5452, time: 0.06108498573303223
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 211
[2m[36m(launch_and_fit pid=318491)[0m client: 211, train loss: 0.0144, loss: 0.0146, recall: 0.5625, mrr: 0.5194, time: 0.06161761283874512
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 193
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 190
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 131
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 11
[2m[36m(launch_and_fit pid=318483)[0m client: 11, train loss: 0.0145, loss: 0.0146, recall: 0.5703, mrr: 0.5253, time: 0.08378124237060547
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 38
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 70
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 175
[2m[36m(launch_and_fit pid=318492)[0m Sta

[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 214
[2m[36m(launch_and_fit pid=318484)[0m client: 214, train loss: 0.0144, loss: 0.0143, recall: 0.7734, mrr: 0.6698, time: 0.05048942565917969
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 209
[2m[36m(launch_and_fit pid=318484)[0m client: 209, train loss: 0.0144, loss: 0.0144, recall: 0.6953, mrr: 0.6797, time: 0.05682635307312012
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 141
[2m[36m(launch_and_fit pid=318484)[0m client: 141, train loss: 0.0143, loss: 0.0142, recall: 0.7109, mrr: 0.6385, time: 0.051212310791015625
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 176
[2m[36m(launch_and_fit pid=318484)[0m client: 176, train loss: 0.0143, loss: 0.0141, recall: 0.7812, mrr: 0.7141, time: 0.04899764060974121
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 36
[2m[36m(launch_and_fit pid=318484)[0m client: 36, train loss: 0.0143, loss: 0.0144, recall: 0.7422, mrr: 0.6885, time: 0.057253360

DEBUG flower 2022-06-17 13:08:54,045 | server.py:281 | fit_round received 297 results and 3 failures


[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 143
[2m[36m(launch_and_fit pid=318492)[0m client: 143, train loss: 0.0144, loss: 0.0150, recall: 0.5312, mrr: 0.4944, time: 0.054724693298339844


DEBUG flower 2022-06-17 13:08:54,411 | server.py:215 | evaluate_round: strategy sampled 150 clients (out of 300)
DEBUG flower 2022-06-17 13:10:52,278 | server.py:227 | evaluate_round received 150 results and 0 failures
DEBUG flower 2022-06-17 13:10:52,279 | server.py:269 | fit_round: strategy sampled 300 clients (out of 300)


[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 294
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 214
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 78
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 150
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 289
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 231
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 196
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 234
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 87
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 102
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 44
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 90
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 65
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 52
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 202
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 69
[2m[36m(launch_and_fit pid=318493)[0m client: 196, train los

[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 110
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 95
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 147
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 188
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 254
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 25
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 278
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 73
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 206
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 85
[2m[36m(launch_and_fit pid=318482)[0m client: 110, train loss: 0.0145, loss: 0.0146, recall: 0.6172, mrr: 0.5598, time: 0.18392682075500488
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 249
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 62
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 109
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 17
[2m[36m(launch_and_fit pid=318495)

[2m[36m(launch_and_fit pid=318496)[0m   out=out, **kwargs)
[2m[36m(launch_and_fit pid=318496)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 28
[2m[36m(launch_and_fit pid=318496)[0m client: 28, train loss: 0.0143, loss: 0.0143, recall: 0.7344, mrr: 0.7188, time: 0.04999828338623047
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 280
[2m[36m(launch_and_fit pid=318485)[0m client: 280, train loss: 0.0143, loss: 0.0145, recall: 0.6641, mrr: 0.6083, time: 0.06115269660949707
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 236
[2m[36m(launch_and_fit pid=318486)[0m client: 236, train loss: 0.0144, loss: 0.0142, recall: 0.6719, mrr: 0.6602, time: 0.05760931968688965
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 288
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 141
[2m[36m(launch_and_fit pid=318484)[0m client: 141, train loss: 0.0143, loss: 0.0142, recall: 0.7109, mrr: 0.6385, time: 0.06617474555969238
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 283
[2m[36m(launch_and_fit pid=318490)[0m client: 283, train loss: 0.0144, lo

[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 205
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 118
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 127
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 265
[2m[36m(launch_and_fit pid=318486)[0m client: 205, train loss: 0.0142, loss: 0.0144, recall: 0.7578, mrr: 0.6117, time: 0.07813620567321777
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 286
[2m[36m(launch_and_fit pid=318497)[0m client: 118, train loss: 0.0142, loss: 0.0143, recall: 0.7031, mrr: 0.6599, time: 0.06757426261901855
[2m[36m(launch_and_fit pid=318488)[0m client: 127, train loss: 0.0143, loss: 0.0145, recall: 0.6484, mrr: 0.6253, time: 0.07772088050842285
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 244
[2m[36m(launch_and_fit pid=318487)[0m client: 244, train loss: 0.0142, loss: 0.0144, recall: 0.6458, mrr: 0.5933, time: 0.0853269100189209
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 96
[2m[36m(launch_

[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 133
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 116
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 269
[2m[36m(launch_and_fit pid=318485)[0m client: 133, train loss: 0.0143, loss: 0.0144, recall: 0.7396, mrr: 0.6594, time: 0.08570432662963867
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 57
[2m[36m(launch_and_fit pid=318495)[0m client: 116, train loss: 0.0144, loss: 0.0148, recall: 0.5833, mrr: 0.5042, time: 0.07944130897521973
[2m[36m(launch_and_fit pid=318490)[0m client: 269, train loss: 0.0142, loss: 0.0142, recall: 0.7031, mrr: 0.6773, time: 0.07519197463989258
[2m[36m(launch_and_fit pid=318482)[0m client: 57, train loss: 0.0143, loss: 0.0145, recall: 0.5938, mrr: 0.5316, time: 0.05616903305053711
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 22
[2m[36m(launch_and_fit pid=318483)[0m client: 22, train loss: 0.0143, loss: 0.0142, recall: 0.6875, mrr: 0.6507, time: 0.060941696166

[2m[36m(raylet)[0m Spilled 131339 MiB, 2515 objects, write throughput 63 MiB/s.


[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 154
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 13
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 72
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 268
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 121
[2m[36m(launch_and_fit pid=318486)[0m client: 121, train loss: 0.0144, loss: 0.0142, recall: 0.7500, mrr: 0.7164, time: 0.07020211219787598
[2m[36m(launch_and_fit pid=318496)[0m client: 154, train loss: 0.0143, loss: 0.0147, recall: 0.5469, mrr: 0.5172, time: 0.08549094200134277
[2m[36m(launch_and_fit pid=318494)[0m client: 13, train loss: 0.0142, loss: 0.0145, recall: 0.6562, mrr: 0.5214, time: 0.07243108749389648
[2m[36m(launch_and_fit pid=318497)[0m client: 72, train loss: 0.0143, loss: 0.0148, recall: 0.5938, mrr: 0.5529, time: 0.07860898971557617
[2m[36m(launch_and_fit pid=318492)[0m client: 268, train loss: 0.0143, loss: 0.0146, recall: 0.6172, mrr: 0.5504, time: 0.076600790023

[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 106
[2m[36m(launch_and_fit pid=318493)[0m client: 106, train loss: 0.0142, loss: 0.0149, recall: 0.5625, mrr: 0.4875, time: 0.057646751403808594
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 155
[2m[36m(launch_and_fit pid=318487)[0m client: 155, train loss: 0.0143, loss: 0.0145, recall: 0.6406, mrr: 0.5803, time: 0.05558061599731445
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 60
[2m[36m(launch_and_fit pid=318487)[0m client: 60, train loss: 0.0143, loss: 0.0145, recall: 0.5938, mrr: 0.5029, time: 0.0537111759185791
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 274
[2m[36m(launch_and_fit pid=318487)[0m client: 274, train loss: 0.0143, loss: 0.0147, recall: 0.5625, mrr: 0.5203, time: 0.06423330307006836
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 292
[2m[36m(launch_and_fit pid=318487)[0m client: 292, train loss: 0.0143, loss: 0.0148, recall: 0.5885, mrr: 0.5603, time: 0.0566966533

DEBUG flower 2022-06-17 13:20:11,514 | server.py:281 | fit_round received 297 results and 3 failures


[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 295
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 134
[2m[36m(launch_and_fit pid=318487)[0m client: 134, train loss: 0.0143, loss: 0.0145, recall: 0.6354, mrr: 0.5526, time: 0.056313276290893555
[2m[36m(launch_and_fit pid=318483)[0m client: 295, train loss: 0.0143, loss: 0.0145, recall: 0.6510, mrr: 0.5953, time: 0.0575098991394043


DEBUG flower 2022-06-17 13:20:11,968 | server.py:215 | evaluate_round: strategy sampled 150 clients (out of 300)
DEBUG flower 2022-06-17 13:22:20,172 | server.py:227 | evaluate_round received 149 results and 1 failures
DEBUG flower 2022-06-17 13:22:20,415 | server.py:269 | fit_round: strategy sampled 300 clients (out of 300)


[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 121
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 78
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 189
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 231
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 128
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 178
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 212
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 273
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 250
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 38
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 245
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 84
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 149
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 97
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 280
[2m[36m(launch_and_fit pid=318486)[0m client: 128, train loss: 0.0143, loss: 0.0146, recall: 0.6328, mrr: 0.5388, ti

[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 143
[2m[36m(launch_and_fit pid=318490)[0m client: 143, train loss: 0.0144, loss: 0.0150, recall: 0.5312, mrr: 0.4944, time: 0.06486296653747559
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 257
[2m[36m(launch_and_fit pid=318490)[0m client: 257, train loss: 0.0142, loss: 0.0144, recall: 0.7344, mrr: 0.5729, time: 0.06251287460327148
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 161
[2m[36m(launch_and_fit pid=318486)[0m client: 161, train loss: 0.0142, loss: 0.0149, recall: 0.4844, mrr: 0.4410, time: 0.07130217552185059
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 30
[2m[36m(launch_and_fit pid=318495)[0m client: 30, train loss: 0.0143, loss: 0.0146, recall: 0.5859, mrr: 0.5716, time: 0.0874474048614502
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 120
[2m[36m(launch_and_fit pid=318490)[0m client: 120, train loss: 0.0142, loss: 0.0145, recall: 0.5938, mrr: 0.5417, time: 0.06788778305

[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 266
[2m[36m(launch_and_fit pid=318482)[0m client: 266, train loss: 0.0142, loss: 0.0143, recall: 0.7344, mrr: 0.6526, time: 0.061713218688964844
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 154
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 147
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 108
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 167
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 104
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 46
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 57
[2m[36m(launch_and_fit pid=318484)[0m client: 154, train loss: 0.0143, loss: 0.0147, recall: 0.5469, mrr: 0.5172, time: 0.10645341873168945
[2m[36m(launch_and_fit pid=318494)[0m client: 147, train loss: 0.0143, loss: 0.0143, recall: 0.6562, mrr: 0.6409, time: 0.13157272338867188
[2m[36m(launch_and_fit pid=318489)[0m client: 108, train loss: 0.0143, loss: 0.0145, recall: 0.63

[2m[36m(launch_and_fit pid=318484)[0m client: 91, train loss: 0.0143, loss: 0.0147, recall: 0.6615, mrr: 0.5559, time: 0.08449435234069824
[2m[36m(launch_and_fit pid=318486)[0m client: 175, train loss: 0.0142, loss: 0.0144, recall: 0.6719, mrr: 0.6335, time: 0.07017207145690918
[2m[36m(launch_and_fit pid=318496)[0m client: 114, train loss: 0.0144, loss: 0.0147, recall: 0.6250, mrr: 0.5761, time: 0.09632086753845215
[2m[36m(launch_and_fit pid=318497)[0m client: 102, train loss: 0.0144, loss: 0.0145, recall: 0.6615, mrr: 0.6044, time: 0.0913083553314209
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 288
[2m[36m(launch_and_fit pid=318490)[0m client: 288, train loss: 0.0142, loss: 0.0146, recall: 0.6458, mrr: 0.5590, time: 0.06552767753601074
[2m[36m(launch_and_fit pid=318482)[0m client: 36, train loss: 0.0143, loss: 0.0144, recall: 0.7422, mrr: 0.6885, time: 0.7358818054199219
[2m[36m(launch_and_fit pid=318487)[0m client: 98, train loss: 0.0144, loss: 0.0146, 

[2m[36m(launch_and_fit pid=318485)[0m   out=out, **kwargs)
[2m[36m(launch_and_fit pid=318485)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 187
[2m[36m(launch_and_fit pid=318485)[0m client: 187, train loss: 0.0143, loss: 0.0145, recall: 0.6719, mrr: 0.6389, time: 0.0726475715637207
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 255
[2m[36m(launch_and_fit pid=318497)[0m client: 255, train loss: 0.0142, loss: 0.0146, recall: 0.6328, mrr: 0.5983, time: 0.05841875076293945
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 222
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 164
[2m[36m(launch_and_fit pid=318484)[0m client: 222, train loss: 0.0143, loss: 0.0144, recall: 0.7500, mrr: 0.6531, time: 0.06215095520019531
[2m[36m(launch_and_fit pid=318495)[0m client: 164, train loss: 0.0144, loss: 0.0146, recall: 0.5859, mrr: 0.5257, time: 0.06441950798034668
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 249
[2m[36m(launch_and_fit pid=318490)[0m client: 249, train loss: 0.0144, loss: 0.0148, recall: 0.5859, mrr: 0.5083, time: 0.059416055

[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 208
[2m[36m(launch_and_fit pid=318489)[0m client: 208, train loss: 0.0143, loss: 0.0145, recall: 0.6406, mrr: 0.5952, time: 0.05688834190368652
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 272
[2m[36m(launch_and_fit pid=318485)[0m client: 272, train loss: 0.0143, loss: 0.0143, recall: 0.6667, mrr: 0.6201, time: 0.07343149185180664
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 69
[2m[36m(launch_and_fit pid=318493)[0m client: 69, train loss: 0.0143, loss: 0.0143, recall: 0.6719, mrr: 0.6380, time: 0.06268477439880371
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 77
[2m[36m(launch_and_fit pid=318491)[0m client: 77, train loss: 0.0143, loss: 0.0146, recall: 0.6562, mrr: 0.5484, time: 0.06964755058288574
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 220
[2m[36m(launch_and_fit pid=318494)[0m client: 220, train loss: 0.0143, loss: 0.0144, recall: 0.7031, mrr: 0.6339, time: 0.069400548934

[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 14
[2m[36m(launch_and_fit pid=318485)[0m client: 14, train loss: 0.0143, loss: 0.0144, recall: 0.6510, mrr: 0.5925, time: 0.05428004264831543
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 118
[2m[36m(launch_and_fit pid=318485)[0m client: 118, train loss: 0.0142, loss: 0.0143, recall: 0.7031, mrr: 0.6599, time: 0.046053171157836914
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 90
[2m[36m(launch_and_fit pid=318485)[0m client: 90, train loss: 0.0144, loss: 0.0145, recall: 0.6328, mrr: 0.5740, time: 0.0641636848449707
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 293
[2m[36m(launch_and_fit pid=318490)[0m client: 293, train loss: 0.0143, loss: 0.0144, recall: 0.6667, mrr: 0.6050, time: 0.05659985542297363
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 221
[2m[36m(launch_and_fit pid=318490)[0m client: 221, train loss: 0.0143, loss: 0.0144, recall: 0.7135, mrr: 0.6486, time: 0.192894935607

DEBUG flower 2022-06-17 13:33:16,371 | server.py:281 | fit_round received 297 results and 3 failures


[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 260
[2m[36m(launch_and_fit pid=318497)[0m client: 260, train loss: 0.0143, loss: 0.0142, recall: 0.7500, mrr: 0.7233, time: 0.049485206604003906


DEBUG flower 2022-06-17 13:33:16,839 | server.py:215 | evaluate_round: strategy sampled 150 clients (out of 300)
DEBUG flower 2022-06-17 13:35:49,964 | server.py:227 | evaluate_round received 150 results and 0 failures
DEBUG flower 2022-06-17 13:35:50,098 | server.py:269 | fit_round: strategy sampled 300 clients (out of 300)


[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 192
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 128
[2m[36m(launch_and_fit pid=318483)[0m Start Epoch # 94
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 189
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 34
[2m[36m(launch_and_fit pid=318489)[0m Start Epoch # 131
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 205
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 167
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 16
[2m[36m(launch_and_fit pid=318492)[0m Start Epoch # 223
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 68
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 213
[2m[36m(launch_and_fit pid=318485)[0m client: 68, train loss: 0.0141, loss: 0.0145, recall: 0.6250, mrr: 0.5677, time: 0.27065372467041016
[2m[36m(launch_and_fit pid=318482)[0m client: 213, train loss: 0.0144, loss: 0.0148, recall: 0.5781, mrr: 0.4879, time: 0.2768876552581787
[2m[36m(la

[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 50
[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 148
[2m[36m(launch_and_fit pid=318490)[0m client: 148, train loss: 0.0144, loss: 0.0144, recall: 0.7083, mrr: 0.6253, time: 0.3714451789855957
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 267
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 2
[2m[36m(launch_and_fit pid=318497)[0m client: 267, train loss: 0.0142, loss: 0.0146, recall: 0.5833, mrr: 0.5612, time: 0.061885833740234375
[2m[36m(launch_and_fit pid=318486)[0m client: 50, train loss: 0.0142, loss: 0.0147, recall: 0.6094, mrr: 0.5639, time: 0.3297388553619385
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 197
[2m[36m(launch_and_fit pid=318493)[0m client: 197, train loss: 0.0142, loss: 0.0143, recall: 0.6979, mrr: 0.6350, time: 0.08722400665283203
[2m[36m(launch_and_fit pid=318488)[0m client: 2, train loss: 0.0144, loss: 0.0145, recall: 0.6354, mrr: 0.5961, time: 0.382770538330078

[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 101
[2m[36m(launch_and_fit pid=318488)[0m Start Epoch # 136
[2m[36m(launch_and_fit pid=318488)[0m client: 136, train loss: 0.0143, loss: 0.0146, recall: 0.5469, mrr: 0.5071, time: 0.06375718116760254
[2m[36m(launch_and_fit pid=318493)[0m client: 101, train loss: 0.0142, loss: 0.0147, recall: 0.6328, mrr: 0.5452, time: 0.08046388626098633
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 188
[2m[36m(launch_and_fit pid=318484)[0m client: 188, train loss: 0.0144, loss: 0.0149, recall: 0.5000, mrr: 0.4807, time: 0.07211470603942871
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 73
[2m[36m(launch_and_fit pid=318496)[0m client: 73, train loss: 0.0143, loss: 0.0148, recall: 0.5625, mrr: 0.4895, time: 0.08146905899047852
[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 231
[2m[36m(launch_and_fit pid=318486)[0m client: 231, train loss: 0.0143, loss: 0.0147, recall: 0.5885, mrr: 0.4687, time: 0.0715169906

[2m[36m(launch_and_fit pid=318490)[0m Start Epoch # 84
[2m[36m(launch_and_fit pid=318490)[0m client: 84, train loss: 0.0144, loss: 0.0146, recall: 0.6406, mrr: 0.5408, time: 0.06715106964111328
[2m[36m(launch_and_fit pid=318484)[0m Start Epoch # 71
[2m[36m(launch_and_fit pid=318484)[0m client: 71, train loss: 0.0143, loss: 0.0146, recall: 0.6354, mrr: 0.5727, time: 0.07934069633483887
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 289
[2m[36m(launch_and_fit pid=318494)[0m client: 289, train loss: 0.0143, loss: 0.0145, recall: 0.6875, mrr: 0.6410, time: 0.07475996017456055
[2m[36m(launch_and_fit pid=318493)[0m Start Epoch # 9
[2m[36m(launch_and_fit pid=318493)[0m client: 9, train loss: 0.0143, loss: 0.0143, recall: 0.6797, mrr: 0.6510, time: 0.06743764877319336
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 199
[2m[36m(launch_and_fit pid=318495)[0m client: 199, train loss: 0.0144, loss: 0.0144, recall: 0.6406, mrr: 0.5859, time: 0.0791499614715576

[2m[36m(launch_and_fit pid=318486)[0m Start Epoch # 207
[2m[36m(launch_and_fit pid=318486)[0m client: 207, train loss: 0.0143, loss: 0.0145, recall: 0.6875, mrr: 0.6630, time: 0.062497615814208984
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 11
[2m[36m(launch_and_fit pid=318496)[0m client: 11, train loss: 0.0145, loss: 0.0146, recall: 0.5703, mrr: 0.5253, time: 0.061151981353759766
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 251
[2m[36m(launch_and_fit pid=318482)[0m client: 251, train loss: 0.0142, loss: 0.0142, recall: 0.7031, mrr: 0.6435, time: 0.06111264228820801
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 269
[2m[36m(launch_and_fit pid=318496)[0m Start Epoch # 292
[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 121
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 208
[2m[36m(launch_and_fit pid=318482)[0m client: 269, train loss: 0.0142, loss: 0.0142, recall: 0.7031, mrr: 0.6773, time: 0.06812572479248047
[2m[36m(launc

[2m[36m(launch_and_fit pid=318491)[0m   out=out, **kwargs)
[2m[36m(launch_and_fit pid=318491)[0m   ret = ret.dtype.type(ret / rcount)



[2m[36m(launch_and_fit pid=318487)[0m Start Epoch # 22
[2m[36m(launch_and_fit pid=318487)[0m client: 22, train loss: 0.0143, loss: 0.0142, recall: 0.6875, mrr: 0.6507, time: 0.06149172782897949
[2m[36m(launch_and_fit pid=318482)[0m Start Epoch # 217
[2m[36m(launch_and_fit pid=318482)[0m client: 217, train loss: 0.0142, loss: 0.0147, recall: 0.6016, mrr: 0.5534, time: 0.059189558029174805
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 15
[2m[36m(launch_and_fit pid=318495)[0m client: 15, train loss: 0.0142, loss: 0.0145, recall: 0.6641, mrr: 0.5732, time: 0.06291985511779785
[2m[36m(launch_and_fit pid=318491)[0m Start Epoch # 120
[2m[36m(launch_and_fit pid=318491)[0m client: 120, train loss: 0.0142, loss: 0.0145, recall: 0.5938, mrr: 0.5417, time: 0.05242919921875
[2m[36m(launch_and_fit pid=318494)[0m Start Epoch # 276
[2m[36m(launch_and_fit pid=318494)[0m client: 276, train loss: 0.0144, loss: 0.0146, recall: 0.6302, mrr: 0.5547, time: 0.0662605762481

[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 279
[2m[36m(launch_and_fit pid=318497)[0m client: 279, train loss: 0.0141, loss: 0.0146, recall: 0.7422, mrr: 0.6980, time: 0.05710339546203613
[2m[36m(launch_and_fit pid=318497)[0m Start Epoch # 1
[2m[36m(launch_and_fit pid=318485)[0m Start Epoch # 35
[2m[36m(launch_and_fit pid=318485)[0m client: 35, train loss: 0.0143, loss: 0.0143, recall: 0.6823, mrr: 0.6280, time: 0.06157565116882324
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 102
[2m[36m(launch_and_fit pid=318497)[0m client: 1, train loss: 0.0143, loss: 0.0143, recall: 0.6797, mrr: 0.5926, time: 0.06122112274169922
[2m[36m(launch_and_fit pid=318495)[0m client: 102, train loss: 0.0144, loss: 0.0145, recall: 0.6615, mrr: 0.6044, time: 0.0612177848815918
[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 172
[2m[36m(launch_and_fit pid=318495)[0m client: 172, train loss: 0.0143, loss: 0.0144, recall: 0.6719, mrr: 0.6219, time: 0.051888227462768

DEBUG flower 2022-06-17 13:46:02,125 | server.py:281 | fit_round received 297 results and 3 failures


[2m[36m(launch_and_fit pid=318495)[0m Start Epoch # 147
[2m[36m(launch_and_fit pid=318495)[0m client: 147, train loss: 0.0143, loss: 0.0143, recall: 0.6562, mrr: 0.6409, time: 0.0546727180480957


DEBUG flower 2022-06-17 13:46:03,365 | server.py:215 | evaluate_round: strategy sampled 150 clients (out of 300)
DEBUG flower 2022-06-17 13:48:02,891 | server.py:227 | evaluate_round received 149 results and 1 failures
INFO flower 2022-06-17 13:48:02,892 | server.py:182 | FL finished in 3911.759077187002
INFO flower 2022-06-17 13:48:02,894 | app.py:149 | app_fit: losses_distributed [(1, 0.01565447713119567), (2, 0.015653786061830377), (3, 0.01565523891953344), (4, 0.015654583338309897), (5, 0.015654574699045052)]
INFO flower 2022-06-17 13:48:02,894 | app.py:150 | app_fit: metrics_distributed {}
INFO flower 2022-06-17 13:48:02,895 | app.py:151 | app_fit: losses_centralized []
INFO flower 2022-06-17 13:48:02,895 | app.py:152 | app_fit: metrics_centralized {}


History (loss, distributed):
	round 1: 0.01565447713119567
	round 2: 0.015653786061830377
	round 3: 0.01565523891953344
	round 4: 0.015654583338309897
	round 5: 0.015654574699045052

In [147]:
import os
os.system('jupyter nbconvert --to html gru-FL-CategoryRecommendation-300.ipynb')

[NbConvertApp] Converting notebook gru-FL-CategoryRecommendation-300.ipynb to html
[NbConvertApp] Writing 1123120 bytes to gru-FL-CategoryRecommendation-300.html


0