# Data Owner 01

Install dependencies

In [None]:
!uv pip install \
    "flwr[simulation]==1.17.0" "flwr-datasets>=0.5.0" \
    "scikit-learn==1.6.1" "torch==2.7.0" \
    syft_flwr imblearn pandas loguru --upgrade

Change `LOCAL_TEST` to True if you want to run the clients locally to test

In [None]:
LOCAL_TEST = True

Install a `syftbox` app needed for this workflow

In [None]:
if not LOCAL_TEST:
    !syftbox app install https://github.com/OpenMined/rds-server

## DO logs in to datasite 

In [None]:
from pathlib import Path

if LOCAL_TEST:
    from syft_rds.orchestra import remove_rds_stack_dir, setup_rds_server

    remove_rds_stack_dir(root_dir=Path("."), key="flwr")

    DO_EMAIL = "do1@openmined.org"
    do_stack = setup_rds_server(email=DO_EMAIL, root_dir=Path("."), key="flwr")
    do_client = do_stack.init_session(host=DO_EMAIL)
else:
    import syft_rds as sy
    from syft_core import Client

    DO_EMAIL = Client.load().email
    print("DO email: ", DO_EMAIL)

    do_client = sy.init_session(host=DO_EMAIL)

## Upload dataset (skip if it's already uploaded)

In [None]:
from huggingface_hub import snapshot_download

DATASET_DIR = Path("./dataset/").expanduser().absolute()

if not DATASET_DIR.exists():
    snapshot_download(
        repo_id="khoaguin/pima-indians-diabetes-database-partitions",
        repo_type="dataset",
        local_dir="./dataset/",
        local_dir_use_symlinks=False,  # Set to False to copy files instead of symlinking
    )

In [None]:
from pathlib import Path

partition_number = 0  # please change this to any number from 0 to 4
DATASET_PATH = Path(f"./dataset/pima-indians-diabetes-database-{partition_number}")

dataset = do_client.dataset.create(
    name="pima-indians-diabetes-database",
    summary="Pima Indians Diabetes Database.",
    description_path=DATASET_PATH / "README.md",
    path=DATASET_PATH / "private",
    mock_path=DATASET_PATH / "mock",
)
dataset.describe()

<img src="./images/doWaitsForJobs.png" width="40%" alt="DO waits for jobs">

## Review and Run Jobs

In [None]:
jobs = do_client.jobs.get_all(status="pending_code_review")
jobs

In [None]:
job = jobs[-1]
job

In [None]:
# same as job.code.describe()
job.show_user_code()

In [None]:
import os

if LOCAL_TEST:
    os.environ["SYFTBOX_CLIENT_CONFIG_PATH"] = str(do_stack.client.config_path)

res_job = do_client.run_private(job)

By running the job privately, the DO trains the model on their local data, and then sends the trained model back to the DS

<img src="./images/doSendModels.png" width="80%" alt="DS Sends Models">