## Collaborative Hybrid Federated Learning example workflow

In this example, we train collaboratively an model with three nodes through an Hybrid Federated Learning workflow. Models are trained locally using Pytorch library and are aggregated across parties under homomorphic encryption.

### Setup

In [None]:
import pandas as pd
import random

from tuneinsight import Diapason, models
from tuneinsight.computations import HybridFL
import tuneinsight.utils.time_tools as time

from ti_models.factories.ti_trainer_factory import get_premade_ti_trainer

#### Create clients

In [None]:
%env NODE_URL=https://amld-workshop-node1.demo.tuneinsight.net
%env OIDC_URL=https://auth.tuneinsight.com/auth/
%env OIDC_CLIENT_ID=
%env TI_USERNAME=
%env TI_PASSWORD=
%env TI_VERIFY_SSL=False

In [None]:
client = Diapason.from_env()

In [None]:
client.healthcheck()

#### Create and share the project

In [None]:
PROJECT_NAME = f"hybrid_fl_{random.randint(10000, 99999)}"

project = client.new_project(name=PROJECT_NAME, clear_if_exists=True)
project.share()

print(project)

## Explore the dataset

In [None]:
data_path = "data/data_0.csv"

In [None]:
df = pd.read_csv(data_path)
df

In [None]:
TRAIN_DATA_PATH = TEST_DATA_PATH = data_path

train_data = pd.read_csv(TRAIN_DATA_PATH, index_col=0)
test_data = pd.read_csv(TEST_DATA_PATH, index_col=0)

train_df = train_data.copy()
train_df["split"] = "train"

test_df = test_data.copy()
test_df["split"] = "val"

full_df = pd.concat([train_df, test_df], ignore_index=True)
full_df

Upload the data to the instance and set it on the project.

In [None]:
datasource = client.new_csv_datasource(csv=data_path, name=f"patient_data_{random.randint(1000, 9999)}", clear_if_exists=True)

In [None]:
project.set_datasource(datasource)

### Machine Learning Task Definition

In [None]:
# This is a preset trainer, but any pytorch model can be created into the same format.
trainer = get_premade_ti_trainer("logreg", input_dim=5, n_classes=2)

In [None]:
print(trainer)

### Secure Collaborative training

In [None]:
# Set the Epsilon and delta parameters for Differential Privacy.
dp_epsilon = 1
dp_delta = 1e-4

In [None]:
params = models.HybridFLGenericParams(
    fl_rounds=2,
    num_workers=2,
    strategy = models.aggregation_strategy.AggregationStrategy.CONSTANT
)

ml_params = models.HybridFLMachineLearningParams(
    local_epochs=1,
    batch_size=64,
    learning_rate=0.02,
    momentum=0.9
)

dp_params = models.HybridFLDpParams(
    delta= dp_delta,
    gradient_clipping=0.1,
)

Define the computation (Hybrid Federated Learning) on the project.

In [None]:
hybrid_fl = HybridFL(
    project=project,
    task_id = "logreg",
    trainer=trainer,
    params=params,
    spec_params= ml_params,
    dp_params=dp_params,
    dp_epsilon=dp_epsilon
)
hybrid_fl.max_timeout = 300 * 60 * time.SECOND

Set the project policy with Differential Privacy

In [None]:
policy = project.get_policy()
policy.enable_differential_privacy()
policy.data_policy.set_budget(dp_epsilon * 10) # You are allowed to run the same project 10 times.

project.set_policy(policy)

Clients authorize the project

In [None]:
project.request_authorization()

Here you can get a quick summary of the project:

In [None]:
project.display_overview()

In [None]:
project.display_datasources()

## Run the training

You will get an error here.

In [None]:
hybrid_fl.run()

### Final aggregated model path

In [None]:
results = project.fetch_results()[-1]

In [None]:
results.result_path