## Setup the local stack (for local testing)

In [None]:
import os
from loguru import logger

import syft_runtimes.high_low as syhl
from syft_rds.orchestra import setup_rds_server, remove_rds_stack_dir
from syft_core.config import CONFIG_PATH_ENV
import syft_datasets as syd

In [None]:
remove_rds_stack_dir(key="low_datasites", root_dir="./")
!rm -rf ./high_datasites

In [None]:
DO = "do1@openmined.org"
do_stack_1 = setup_rds_server(email=DO, key="low_datasites", root_dir="./")

In [None]:
lowside_client = do_stack_1.client
lowside_syftbox_dir = do_stack_1.client.workspace.data_dir
highside_syftbox_dir = lowside_syftbox_dir.parent / "high_datasites"
lowside_email = DO

In [None]:
lowside_client.email

## DO (High): Initializing the high datasite

In [None]:
# Low side settings
print(f"{lowside_syftbox_dir = }")
print(f"{highside_syftbox_dir = }")

In [None]:
# High side settings
highlow_identifier = "highlow-1234"
highlow_identifier

In [None]:
# First, initialize the high datasite - runs on the low side
highside_client: syhl.HighSideClient = syhl.initialize_high_datasite(
    email=DO,
    highlow_identifier=highlow_identifier,
    data_dir=highside_syftbox_dir,
    force_overwrite=True,
)

In [None]:
highside_client.workspace.data_dir

## DO (high): Create a high-side dataset

In [None]:
# Create some random mock and private data
from pathlib import Path
import random
import pandas as pd

num_mock_rows = 100
num_private_rows = 100

mock_data = {
    "age": [random.randint(0, 100) for _ in range(num_mock_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_mock_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_mock_rows)],
}
private_data = {
    "age": [random.randint(0, 100) for _ in range(num_private_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_private_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_private_rows)],
}

mock_df = pd.DataFrame(mock_data)
private_df = pd.DataFrame(private_data)

data_dir = Path("./data")
data_dir.mkdir(parents=True, exist_ok=True)
mock_df.to_csv(data_dir / "mock_data.csv", index=False)
private_df.to_csv(data_dir / "private_data.csv", index=False)

readme_content = """
# My example high-side dataset
"""
readme_path = data_dir / "README.md"
readme_path.write_text(readme_content)

In [None]:
os.environ[CONFIG_PATH_ENV] = str(highside_client.config_path)

DATASET_NAME = "my_cool_dataset"

highside_dataset = syd.create(
    name=DATASET_NAME,
    mock_path=data_dir / "mock_data.csv",
    private_path=data_dir / "private_data.csv",
    readme_path=readme_path,
)

In [None]:
highside_dataset.mock_dir

In [None]:
highside_dataset.private_dir

In [None]:
# We can now see the dataset on the low side!
from syft_datasets import SyftDatasetManager

highside_dataset_manager = SyftDatasetManager(syftbox_client=highside_client)
highside_datasets = highside_dataset_manager.get_all()

highside_datasets

## DO (High): Connect to low-side

Connecting via local connection

In [None]:
lowside_client = highside_client.lowside_connect(
    highlow_identifier=highlow_identifier,
    lowside_data_dir=lowside_syftbox_dir,
)

## DO (High): Rsync the dataset (mock part only) to the low side

In [None]:
# Sync the dataset (mock part) to the low side
highside_client.sync_dataset(
    dataset_name=DATASET_NAME,
    verbose=True,
)

## DO (High): Rsync another dataset (mock part only) to the low side

In [None]:
os.environ[CONFIG_PATH_ENV] = str(highside_client.config_path)

DATASET_2_NAME = "my_cool_dataset_2"

highside_dataset = syd.create(
    name=DATASET_2_NAME,
    mock_path=data_dir / "mock_data.csv",
    private_path=data_dir / "private_data.csv",
    readme_path=readme_path,
)

In [None]:
# Sync the dataset (mock part) to the low side
highside_client.sync_dataset(
    dataset_name=DATASET_2_NAME,
    verbose=True,
)

## DO (Low): Checks if the dataset exists (only the mock part)

In [None]:
# We can now see the dataset on the low side!
from syft_datasets import SyftDatasetManager

lowside_syftbox_client = do_stack_1.client

lowside_dataset_manager = SyftDatasetManager(syftbox_client=lowside_syftbox_client)
lowside_datasets = lowside_dataset_manager.get_all()

lowside_datasets

In [None]:
for dataset in lowside_datasets:
    print(dataset.mock_dir)

In [None]:
try:
    for dataset in lowside_datasets:
        print(dataset.private_dir)
except Exception as e:
    logger.error(
        "Can't access private dataset from low side because it's a high-low dataset"
    )
    logger.error(e)

## DS: sees that the mock dataset is uploaded to the SyftBox network, play with it and submits a job to the DO's low datasite

In [None]:
from syft_rds import init_session

ds_client = init_session(host=DO)

ds_client.job.submit(...)

## DO (Low): Review and approve jobs

In [None]:
jobs = lowside_syftbox_client.jobs()

In [None]:
lowside_syftbox_client.approve(
    jobs[0]
)  # move the job to private/<email>/syft_runtimes/<runtime_name>/jobs

## DO (High): Rsync pending jobs from the low side client

In [None]:
highside_client.sync_pending_jobs()

## DO (High): Runs the job on private data


In [None]:
pending_jobs = highside_client.job.get_all()  # get all pending jobs
for job in pending_jobs:
    highside_client.run_private(job)  # run the job using syft_runtimes

## DO (High): Review and rsync job results in the `done` folder to the lowside client

In [None]:
highside_client.sync_done_jobs(ignore_existing=False)