# Data Scientist (DS)

Change `LOCAL_TEST` to True if you want to run the clients locally to test

In [16]:
LOCAL_TEST = False

## Some paths and constants 

In [17]:
from pathlib import Path

SYFTBOX_DATASET_NAME = "pima-indians-diabetes-database"
RUN_SIMULATION = True

## Log into the data owners' datasites

In [18]:
if LOCAL_TEST:
    from syft_rds.orchestra import setup_rds_server

    print("Running locally!")

    DS = "ds@openmined.org"
    print("DS email: ", DS)

    DO1 = "do1@openmined.org"
    DO2 = "do2@openmined.org"

    ds_stack = setup_rds_server(email=DS, key="flwr", root_dir=Path("."))
    do_client_1 = ds_stack.init_session(host=DO1)
    do_client_2 = ds_stack.init_session(host=DO2)
else:
    import syft_rds as sy
    from syft_core import Client

    DS = Client.load().email
    print("DS email: ", DS)

    DO1 = "flower-test-group-1@openmined.org"
    DO2 = "flower-test-group-2@openmined.org"

    do_client_1 = sy.init_session(host=DO1)
    print("Logged into: ", do_client_1.host)

    do_client_2 = sy.init_session(host=DO2)
    print("Logged into: ", do_client_2.host)

do_clients = [do_client_1, do_client_2]
do_emails = [DO1, DO2]

[32m2025-06-30 12:01:16.226[0m | [34m[1mDEBUG   [0m | [36msyft_rds.client.rds_client[0m:[36m_start_job_polling[0m:[36m312[0m - [34m[1mStarting thread to poll jobs.[0m
[32m2025-06-30 12:01:16.229[0m | [34m[1mDEBUG   [0m | [36msyft_rds.client.rds_client[0m:[36m_start_job_polling[0m:[36m312[0m - [34m[1mStarting thread to poll jobs.[0m


DS email:  borja.arroyog@upm.es
Logged into:  flower-test-group-1@openmined.org
Logged into:  flower-test-group-2@openmined.org


## Explore the datasets

In [19]:
SYFTBOX_DATASET_NAME

'pima-indians-diabetes-database'

In [20]:
mock_paths = []
for client in do_clients:
    dataset = client.dataset.get(name=SYFTBOX_DATASET_NAME)
    mock_paths.append(dataset.get_mock_path())
    print(f"Client {client.host}'s dataset: \n{dataset}\n")

Client flower-test-group-1@openmined.org's dataset: 
Dataset
  uid: e9295048-0400-428b-96ec-10722b0bea82
  created_by: None
  created_at: 2025-05-26T03:28:12.354767Z
  updated_at: 2025-05-26T03:28:12.354773Z
  client_id: a68be9a5-8b06-4446-84cc-45bd0db53206
  name: pima-indians-diabetes-database
  private: syft://flower-test-group-1@openmined.org/private/datasets/pima-indians-diabetes-database
  mock: syft://flower-test-group-1@openmined.org/public/datasets/pima-indians-diabetes-database
  summary: Pima Indians Diabetes Database.
  readme: syft://flower-test-group-1@openmined.org/public/datasets/pima-indians-diabetes-database/README.md
  tags: []
  runtime_id: None

Client flower-test-group-2@openmined.org's dataset: 
Dataset
  uid: 245bd016-7abd-4731-ad95-1f9e5adbdc0d
  created_by: None
  created_at: 2025-05-26T03:28:14.559856Z
  updated_at: 2025-05-26T03:28:14.559861Z
  client_id: 0d4df529-ae9c-4e7a-ad90-86ea857b968b
  name: pima-indians-diabetes-database
  private: syft://flower-tes

## Bootstrapping and run `syft_flwr` simulation

In [21]:
SYFT_FLWR_PROJECT_PATH = Path("./fl-diabetes-prediction")
assert SYFT_FLWR_PROJECT_PATH.exists()

In [22]:
import syft_flwr

try:
    !rm -rf {SYFT_FLWR_PROJECT_PATH / "main.py"}
    syft_flwr.bootstrap(SYFT_FLWR_PROJECT_PATH, aggregator=DS, datasites=do_emails)
    print("Bootstrapped project successfully ✅")
except Exception as e:
    print(e)

[32m2025-06-30 12:01:25.717[0m | [1mINFO    [0m | [36msyft_flwr.bootstrap[0m:[36mbootstrap[0m:[36m103[0m - [1mSuccessfully bootstrapped syft-flwr project at fl-diabetes-prediction with datasites ['flower-test-group-1@openmined.org', 'flower-test-group-2@openmined.org'] and aggregator borja.arroyog@upm.es ✅[0m


Bootstrapped project successfully ✅


## Run `flwr` and `syft_flwr` simulations (optional)

In [None]:
RUN_SIMULATION = True

In [26]:
if RUN_SIMULATION:
    !flwr run {SYFT_FLWR_PROJECT_PATH}

In [27]:
# clean up
!rm -rf {SYFT_FLWR_PROJECT_PATH / "fl_diabetes_prediction" / "__pycache__"}
!rm -rf weights/

In [28]:
mock_paths

[PosixPath('/Users/borja/SyftBox/datasites/flower-test-group-1@openmined.org/public/datasets/pima-indians-diabetes-database'),
 PosixPath('/Users/borja/SyftBox/datasites/flower-test-group-2@openmined.org/public/datasets/pima-indians-diabetes-database')]

In [29]:
if RUN_SIMULATION:
    print(f"running syft_flwr simulation with mock paths: {mock_paths}")
    syft_flwr.run(SYFT_FLWR_PROJECT_PATH, mock_paths)

## Submit jobs

<img src="./images/dsSendsJobs.png" width="80%" alt="DS Submits Jobs">

In [30]:
# clean up before submitting jobs
!rm -rf {SYFT_FLWR_PROJECT_PATH / "fl_diabetes_prediction" / "__pycache__"}
!rm -rf {SYFT_FLWR_PROJECT_PATH / "simulation_logs"}
!rm -rf weights/

In [31]:
for client in do_clients:
    print(f"sending job to {client.host}")
    job = client.jobs.submit(
        name="Syft Flower Experiment",
        description="Syft Flower Federated Learning Experiment",
        user_code_path=SYFT_FLWR_PROJECT_PATH,
        dataset_name=SYFTBOX_DATASET_NAME,
        tags=["federated learning", "fl", "syft_flwr", "flwr"],
        entrypoint="main.py",
    )
    print(job)

sending job to flower-test-group-1@openmined.org


  job = client.jobs.submit(
[32m2025-06-30 12:01:56.696[0m | [34m[1mDEBUG   [0m | [36msyft_rds.client.rds_clients.runtime[0m:[36mget_or_create[0m:[36m69[0m - [34m[1mRuntime 'syft_default_python_runtime' already exists. Returning existing runtime.[0m


Job
  uid: e4e6b3be-932b-46a6-a8c6-463dee472701
  created_by: borja.arroyog@upm.es
  created_at: 2025-06-30T10:01:56.781436Z
  updated_at: 2025-06-30T10:01:56.781439Z
  client_id: a68be9a5-8b06-4446-84cc-45bd0db53206
  name: Syft Flower Experiment
  dataset_name: pima-indians-diabetes-database
  runtime_id: e7f6abe6-fa41-41c8-8e83-a5bcb248f2d5
  user_code_id: 570c436e-576c-4c09-a485-c9b573948975
  custom_function_id: None
  description: Syft Flower Federated Learning Experiment
  tags: ['federated learning', 'fl', 'syft_flwr', 'flwr']
  user_metadata: {}
  status: pending_code_review
  error: no_error
  error_message: None
  output_url: syft://flower-test-group-1@openmined.org/app_data/RDS/user_files/borja.arroyog@upm.es/Job/e4e6b3be-932b-46a6-a8c6-463dee472701
sending job to flower-test-group-2@openmined.org


[32m2025-06-30 12:01:57.015[0m | [34m[1mDEBUG   [0m | [36msyft_rds.client.rds_clients.runtime[0m:[36mget_or_create[0m:[36m69[0m - [34m[1mRuntime 'syft_default_python_runtime' already exists. Returning existing runtime.[0m


Job
  uid: 77c32e3b-39cf-4673-b2f4-2faab548613b
  created_by: borja.arroyog@upm.es
  created_at: 2025-06-30T10:01:57.106010Z
  updated_at: 2025-06-30T10:01:57.106011Z
  client_id: 0d4df529-ae9c-4e7a-ad90-86ea857b968b
  name: Syft Flower Experiment
  dataset_name: pima-indians-diabetes-database
  runtime_id: b1f1cb23-e932-41a3-a054-c513f3dcb45c
  user_code_id: 77730b98-d3f0-4b88-8cb4-f668c1702cbe
  custom_function_id: None
  description: Syft Flower Federated Learning Experiment
  tags: ['federated learning', 'fl', 'syft_flwr', 'flwr']
  user_metadata: {}
  status: pending_code_review
  error: no_error
  error_message: None
  output_url: syft://flower-test-group-2@openmined.org/app_data/RDS/user_files/borja.arroyog@upm.es/Job/77c32e3b-39cf-4673-b2f4-2faab548613b


<img src="./images/dsDoneSubmittingJobs.png" width="40%" alt="DS waits for jobs to be approved">

## DS starts the FL server code

In [32]:
import os

if LOCAL_TEST:
    os.environ["SYFTBOX_CLIENT_CONFIG_PATH"] = str(ds_stack.client.config_path)

os.environ["LOGURU_LEVEL"] = "DEBUG"
os.environ["SYFT_FLWR_MSG_TIMEOUT"] = "60"

!uv run {str(SYFT_FLWR_PROJECT_PATH / "main.py")} --active

/Users/borja/Projects/syft/fl-diabetes-prediction/.venv/lib/python3.13/site-packages/pydantic/_internal/_generate_schema.py:923: PydanticDeprecatedSince20: `__get_validators__` is deprecated and will be removed, use `__get_pydantic_core_schema__` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  warn(
/Users/borja/Projects/syft/fl-diabetes-prediction/.venv/lib/python3.13/site-packages/pydantic/_internal/_config.py:323: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
/Users/borja/Projects/syft/fl-diabetes-prediction/.venv/lib/python3.13/site-packages/pydantic/_internal/_generate_schema.py:923: PydanticDeprecatedSince20: `__get_validators__` is deprecated and will be removed, use `__get_pydantic_core_schema__` instead. D

By running the FL server code, the DS aggregates the models trained on DOs' private local data into an improved global model

<img src="./images/dsAggregateModels.png" width="30%" alt="DS Aggregates Models">

## DS Observes the Results

Now the DS can monitor the aggregated models trained no DO's private datasets in the `weights` folder