# Federated CARLA with Director example

In [1]:
# install requirements
!pip install -r requirements.txt

Collecting numpy==1.23.1
  Using cached numpy-1.23.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.2
    Uninstalling numpy-1.23.2:
      Successfully uninstalled numpy-1.23.2
Successfully installed numpy-1.23.1
You should consider upgrading via the '/home/inteluser/anaconda3/envs/openfl/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
import numpy as np
np.random.seed(0)
import os
# disable GPUs due to Tensoflow not supporting CUDA 11
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# Connect to the Federation

In [3]:
# Create a federation
from openfl.interface.interactive_api.federation import Federation

# please use the same identificator that was used in signed certificate
cliend_id = 'frontend'

# 1) Run with API layer - Director mTLS
# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface
# cert_chain = 'cert/root_ca.crt'
# API_certificate = 'cert/frontend.crt'
# API_private_key = 'cert/frontend.key'

# federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051', disable_tls=False,
#                        cert_chain=cert_chain, api_cert=API_certificate, api_private_key=API_private_key)

# --------------------------------------------------------------------------------------------------------------------

# 2) Run with TLS disabled (trusted environment)
# Federation can also determine local fqdn automatically
federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051', tls=False)

In [4]:
shard_registry = federation.get_shard_registry()
shard_registry

{'env_one': {'shard_info': node_info {
    name: "env_one"
  }
  shard_description: "Dataset from env_one by CARLA"
  sample_shape: "600"
  sample_shape: "800"
  sample_shape: "3"
  target_shape: "1",
  'is_online': True,
  'is_experiment_running': False,
  'last_updated': '2022-08-15 19:53:18',
  'current_time': '2022-08-15 19:53:37',
  'valid_duration': seconds: 120,
  'experiment_name': 'ExperimentName Mock'},
 'env_two': {'shard_info': node_info {
    name: "env_two"
  }
  shard_description: "Dataset from env_two by CARLA"
  sample_shape: "600"
  sample_shape: "800"
  sample_shape: "3"
  target_shape: "1",
  'is_online': True,
  'is_experiment_running': False,
  'last_updated': '2022-08-15 19:52:42',
  'current_time': '2022-08-15 19:53:37',
  'valid_duration': seconds: 120,
  'experiment_name': 'ExperimentName Mock'}}

In [5]:
federation.target_shape

['1']

In [6]:
# First, request a dummy_shard_desc that holds information about the federated dataset 
dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)
sample, target = dummy_shard_desc.get_dataset(dataset_type='')[0]
f"Sample shape: {sample.shape}, target shape: {target.shape}"

'Sample shape: (600, 800, 3), target shape: (1,)'

## Creating a FL experiment using Interactive API

In [7]:
from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment

2022-08-15 19:53:38.281210: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: 
2022-08-15 19:53:38.281231: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


### Register dataset

In [8]:
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):

    def __init__(self, train_dataset, train_labels):
        self.train_dataset = train_dataset
        self.train_labels = train_labels

    def __getitem__(self):
        return self.train_dataset, self.train_labels

# Now you can implement you data loaders using dummy_shard_desc
class CARLAFedDataset(DataInterface):

    def __init__(self, train_dataset=[], val_dataset=[], train_val_split=0.8, **kwargs):
        super().__init__(**kwargs)
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.train_val_split = train_val_split

    @property
    def shard_descriptor(self):
        return self._shard_descriptor

    @shard_descriptor.setter
    def shard_descriptor(self, shard_descriptor):
        """
        Describe per-collaborator procedures or sharding.

        This method will be called during a collaborator initialization.
        Local shard_descriptor will be set by Envoy.
        """
        self._shard_descriptor = shard_descriptor

    def __getitem__(self, index):
        return self.shard_descriptor[index]

    def __len__(self):
        return len(self.shard_descriptor)

    def get_train_loader(self):
        """
        Output of this method will be provided to tasks with optimizer in contract
        """
        print('tttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt')
        # if self.kwargs['train_bs']:
        #     batch_size = self.kwargs['train_bs']
        # else:
        #     batch_size = 64

        self.train_dataset, _ = self.shard_descriptor.get_dataset('train', labels=LABELS)
        # train_valid_split = int(self.train_val_split*len(train_dataset))
        # np.random.shuffle(train_dataset)
        # self.train_dataset = train_dataset[:train_valid_split]
        # self.val_dataset = train_dataset[train_valid_split:]
        return self.train_dataset

    def get_valid_loader(self):
        """
        Output of this method will be provided to tasks without optimizer in contract
        """
        print('vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv')
#         if self.kwargs['valid_bs']:
#             batch_size = self.kwargs['valid_bs']
#         else:
#             batch_size = 512

#         self.val_dataset, self.val_labels = self.shard_descriptor.get_dataset('val', self.train_val_split, labels=LABELS)
        self.val_dataset, _ = self.shard_descriptor.get_dataset('val', labels=LABELS)
        return self.val_dataset
        # return DataGenerator(self.val_dataset, batch_size=batch_size)

    def get_train_data_size(self):
        """
        Information for aggregation
        """
        return len(self.train_dataset)

    def get_valid_data_size(self):
        """
        Information for aggregation
        """
        return len(self.val_dataset)


### Describe a model and optimizer

In [9]:
import tensorflow as tf
print('TensorFlow', tf.__version__)
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.layers import concatenate
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
import pickle
import os, cv2
from preprocessing import BatchGenerator
from utils import decode_netout, draw_boxes

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

%matplotlib inline

TensorFlow 2.9.1


In [10]:
LABELS = ['vehicle']

IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 13 , 13
BOX              = 5
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD    = 0.3#0.5
NMS_THRESHOLD    = 0.3#0.45
ANCHORS          = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]

NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 0.0

BATCH_SIZE       = 16
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 10

In [11]:
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))

# Layer 1
x = Conv2D(16, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
x = BatchNormalization(name='norm_1')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 2 - 5
for i in range(0,4):
    x = Conv2D(32*(2**i), (3,3), strides=(1,1), padding='same', name='conv_' + str(i+2), use_bias=False)(x)
    x = BatchNormalization(name='norm_' + str(i+2))(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 6
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
x = BatchNormalization(name='norm_6')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(1,1), padding='same')(x)

# Layer 7 - 8
for i in range(0,2):
    x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_' + str(i+7), use_bias=False)(x)
    x = BatchNormalization(name='norm_' + str(i+7))(x)
    x = LeakyReLU(alpha=0.1)(x)

feature_extractor = Model(input_image, x)
feature_extractor.load_weights('tiny_yolo_backend.h5')

# Layer 9
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_9')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)

# small hack to allow true_boxes to be registered when Keras build the model 
# for more information: https://github.com/fchollet/keras/issues/2790
output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output)

2022-08-15 19:53:40.250788: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-08-15 19:53:40.250823: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: netcompute-testbed-node1
2022-08-15 19:53:40.250831: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: netcompute-testbed-node1
2022-08-15 19:53:40.250963: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.85.2
2022-08-15 19:53:40.250987: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 510.85.2
2022-08-15 19:53:40.250994: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 510.85.2
2022-08-15 19:53:40.251441: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use th

In [12]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv_1 (Conv2D)                (None, 416, 416, 16  432         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 norm_1 (BatchNormalization)    (None, 416, 416, 16  64          ['conv_1[0][0]']                 
                                )                                                           

In [13]:
layer   = model.layers[-4] # the last convolutional layer
weights = layer.get_weights()

new_kernel = np.random.normal(size=weights[0].shape)/(GRID_H*GRID_W)
new_bias   = np.random.normal(size=weights[1].shape)/(GRID_H*GRID_W)

layer.set_weights([new_kernel, new_bias])

In [14]:
def custom_loss(b_batch, y_true, y_pred):
    tf.to_float = lambda x: tf.cast(x, tf.float32)
    y_true = tf.to_float(y_true)
    mask_shape = tf.shape(y_true)[:4]
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3,4))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
    
    coord_mask = tf.zeros(mask_shape)
    conf_mask  = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)
    
    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)
    
    """
    Adjust prediction
    """
    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
    
    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])
    ### adjust class probabilities
    pred_box_class = y_pred[..., 5:]
    
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell
    
    ### adjust w and h
    true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
    
    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins    = true_box_xy - true_wh_half
    true_maxes   = true_box_xy + true_wh_half
    
    pred_wh_half = pred_box_wh / 2.
    pred_mins    = pred_box_xy - pred_wh_half
    pred_maxes   = pred_box_xy + pred_wh_half       
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)
    
    true_box_conf = iou_scores * y_true[..., 4]
    
    ### adjust class probabilities
    true_box_class = tf.argmax(y_true[..., 5:], -1)
    
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE
    
    ### confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    # true_xy = true_boxes[..., 0:2]
    # true_wh = true_boxes[..., 2:4]
    b_batch = tf.to_float(b_batch)
    true_xy = b_batch[..., 0:2]
    true_wh = b_batch[..., 2:4]
    
    true_wh_half = true_wh / 2.
    true_mins    = true_xy - true_wh_half
    true_maxes   = true_xy + true_wh_half
    
    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)
    
    pred_wh_half = pred_wh / 2.
    pred_mins    = pred_xy - pred_wh_half
    pred_maxes   = pred_xy + pred_wh_half    
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE
    
    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE
    
    
    ### class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE       
    
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
    seen = tf.compat.v1.assign_add(seen, 1.)
    
    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), 
                          lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                   true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2]) * no_boxes_mask, 
                                   tf.ones_like(coord_mask)],
                          lambda: [true_box_xy, 
                                   true_box_wh,
                                   coord_mask])
    
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
    
    true_box_xy = tf.to_float(true_box_xy)
    coord_mask = tf.to_float(coord_mask)
    
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
    
    loss = loss_xy + loss_wh + loss_conf + loss_class
    
#     nb_true_box = tf.reduce_sum(y_true[..., 4])
#     nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))

#     """
#     Debugging code
#     """    
#     current_recall = nb_pred_box/(nb_true_box + 1e-6)
#     total_recall = tf.compat.v1.assign_add(total_recall, current_recall) 

    # loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
    # loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    # loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    # loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    # loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    # loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    # loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
    # loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
    # tf.print(loss)
    
    return loss

In [15]:
generator_config = {
    'IMAGE_H'         : IMAGE_H, 
    'IMAGE_W'         : IMAGE_W,
    'GRID_H'          : GRID_H,  
    'GRID_W'          : GRID_W,
    'BOX'             : BOX,
    'LABELS'          : LABELS,
    'CLASS'           : len(LABELS),
    'ANCHORS'         : ANCHORS,
    'BATCH_SIZE'      : BATCH_SIZE,
    'TRUE_BOX_BUFFER' : 10,
}

In [16]:
def normalize(image):
    return image / 255.

In [17]:
optimizer = Adam(lr=1e-4)
# optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)

  super(Adam, self).__init__(name, **kwargs)


In [18]:
fed_dataset = CARLAFedDataset(train_bs=64, valid_bs=512)

### Define and register FL tasks

In [19]:
import matplotlib.pyplot as plt
import os
from utils import evaluate

TI = TaskInterface()

# https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit
@TI.register_fl_task(model='model', data_loader='train_loader', device='device', optimizer='optimizer')

def train(model, train_loader, device, optimizer):
    # train_dataset, train_labels = train_loader
    train_batch = BatchGenerator(train_loader, generator_config, norm=normalize, jitter=False)

    # Iterate over the batches of the dataset.
    batch_loss = []
    for step, (x_batch_train, y_batch_train) in enumerate(train_batch):
        y = tf.convert_to_tensor(y_batch_train)
        _, b_batch = x_batch_train
        b_batch = tf.convert_to_tensor(b_batch)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch_train, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = custom_loss(b_batch, y, y_pred)

        # Compute gradients
        trainable_vars = model.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics
        batch_loss.append(loss)
    train_loss = sum(batch_loss)/len(batch_loss)
    print('Training loss over epoch: %.4f' % (float(train_loss)))
    return {'train_loss': train_loss}


@TI.register_fl_task(model='model', data_loader='val_loader', device='device')
def validate(model, val_loader, device=''):
    # val_dataset, val_labels = val_loader
    val_batch = BatchGenerator(val_loader, generator_config, norm=normalize, jitter=False)
    
    ############################################
    # Compute mAP on the validation set
    ############################################
    average_precisions = evaluate(model, val_batch)     

    # print evaluation
    for label, average_precision in average_precisions.items():
        print(LABELS[label], '{:.4f}'.format(average_precision))
    mAP = sum(average_precisions.values()) / len(average_precisions)
    return {'mAP': tf.convert_to_tensor(mAP)}

    # for x_batch_val, y_batch_val in val_loader:
    #     y = tf.convert_to_tensor(y_batch_val)
    #     # Compute predictions
    #     y_pred = model(x_batch_val, training=False)
    #     # Update the metrics.
    #     val_acc_metric.update_state(y, y_pred)
    # val_acc = val_acc_metric.result()
    # val_acc_metric.reset_states()
    # return {'validation_accuracy': val_acc}


#### Register model

In [20]:
from copy import deepcopy

framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin'
MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)
# Save the initial model state
initial_model = deepcopy(model)





INFO:tensorflow:Assets written to: ram://92a8ee7c-3d64-4219-9cad-758dfa3f9bea/assets


INFO:tensorflow:Assets written to: ram://92a8ee7c-3d64-4219-9cad-758dfa3f9bea/assets






## Time to start a federated learning experiment

In [21]:
# create an experimnet in federation
experiment_name = 'CARLA_experiment'
fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)

In [22]:
# If I use autoreload I got a pickling error
# The following command zips the workspace and python requirements to be transfered to collaborator nodes
fl_experiment.start(model_provider=MI, 
                    task_keeper=TI,
                    data_loader=fed_dataset,
                    rounds_to_train=70,
                    opt_treatment='RESET')





INFO:tensorflow:Assets written to: ram://dbf42ad2-6cf8-4391-b1f6-4deefc2fb813/assets


INFO:tensorflow:Assets written to: ram://dbf42ad2-6cf8-4391-b1f6-4deefc2fb813/assets


In [None]:
# If user want to stop IPython session, then reconnect and check how experiment is going 
# fl_experiment.restore_experiment_state(MI)

fl_experiment.stream_metrics()

## Testing the best model

In [None]:
!pip install -r ../envoy/sd_requirements.txt

In [None]:
import sys
sys.path.insert(1, '../envoy')

In [None]:
from shard_descriptor import NextWordShardDescriptor

# https://www.gutenberg.org/files/2892/2892-h/2892-h.htm
fed_dataset = NextWordSD(train_bs=64, valid_bs=512, train_val_split=0)
fed_dataset.shard_descriptor = NextWordShardDescriptor(title='Irish Fairy Tales', author='James Stephens')

In [None]:
best_model = fl_experiment.get_best_model()

# We remove data from director
fl_experiment.remove_experiment_data()

# Validating initial model
validate(initial_model, fed_dataset.get_valid_loader())

In [None]:
# Validating trained model
validate(best_model, fed_dataset.get_valid_loader())