In [None]:
import syft_runtimes.high_low as syhl
from syft_runtimes.high_low.rsync import RsyncConfig
from syft_core import Client

In [None]:
lowside_client = Client.load()
lowside_email = lowside_client.email
lowside_email

In [None]:
highside_identifier = "highside-1234"

## Initializing the high datasite and connect to it

In [None]:
# Low side settings

# NOTE: This needs to point to an existing syftbox directory. If it does not exist, create one first: https://www.syftbox.net/

# Only required if you want to connect to the low side via SSH (for syncing from high to low)
# ssh_config: dict = {
#     "low_ssh_host": "example.com",
#     "low_ssh_user": "username",
#     "low_ssh_port": 22,
#     "low_ssh_key_path": "/path/to/private/key",
# }

# For local testing, without SSH connection
ssh_config = {}

In [None]:
# First, initialize the high datasite
highside_data_dir = syhl.initialize_high_datasite(
    highside_identifier=highside_identifier,
    force_overwrite=True,
)

In [None]:
highside_data_dir

In [None]:
highside_email = lowside_client.email
highside_client = syhl.high_side_connect(
    email=highside_email, data_dir=highside_data_dir
)

# Initial sync

In [None]:
sync_config: RsyncConfig = syhl.create_default_sync_config(
    highside_client=highside_client,
    lowside_client=lowside_client,
    highside_identifier=highside_identifier,
    force_overwrite=True,
    **ssh_config,
)

In [None]:
sync_config.model_dump()

In [None]:
# for debugging

from syft_runtimes.high_low.setup import _get_sync_commands

commands = _get_sync_commands(rsync_config=sync_config, verbose=True)
commands

In [None]:
syhl.sync(syftbox_client=highside_client, rsync_config=sync_config)

# Create a high-side dataset

In [None]:
import syft_datasets as syd
from syft_core.config import CONFIG_PATH_ENV
import os

# Set the high client as default syftbox client
os.environ[CONFIG_PATH_ENV] = str(highside_client.config_path)

In [None]:
os.environ[CONFIG_PATH_ENV]

In [None]:
# Create some random mock and private data

from pathlib import Path
import random
import pandas as pd

num_mock_rows = 100
num_private_rows = 100

mock_data = {
    "age": [random.randint(0, 100) for _ in range(num_mock_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_mock_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_mock_rows)],
}
private_data = {
    "age": [random.randint(0, 100) for _ in range(num_private_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_private_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_private_rows)],
}

mock_df = pd.DataFrame(mock_data)
private_df = pd.DataFrame(private_data)

data_dir = Path("./data")
data_dir.mkdir(parents=True, exist_ok=True)
mock_df.to_csv(data_dir / "mock_data.csv", index=False)
private_df.to_csv(data_dir / "private_data.csv", index=False)

In [None]:
readme_content = """
# My example high-side dataset
"""
readme_path = data_dir / "README.md"
readme_path.write_text(readme_content)

In [None]:
DATASET_NAME = "my_cool_dataset"

highside_dataset = syd.create(
    name=DATASET_NAME,
    mock_path=data_dir / "mock_data.csv",
    private_path=data_dir / "private_data.csv",
    readme_path=readme_path,
    summary="This is a mock dataset for demonstration purposes.",
    tags=["example", "testing", "highside"],
    location=highside_identifier,
)

In [None]:
highside_dataset.mock_dir

In [None]:
highside_dataset.private_dir

In [None]:
from syft_rds import init_session


high_rds_client = init_session(host=highside_client.email)

In [None]:
print(f"{high_rds_client.is_admin = }")
high_rds_client.dataset.get_all()

In [None]:
highside_client.workspace.data_dir

In [None]:
lowside_client.workspace.data_dir

In [None]:
syhl.sync_dataset(
    dataset_name=DATASET_NAME,
    highside_client=highside_client,
    lowside_client=lowside_client,
    verbose=True,
)

# Switch to low-side

In [None]:
lowside_client.workspace.data_dir

In [None]:
# We can now see the dataset on the low side!
from syft_datasets import SyftDatasetManager

lowside_dataset_manager = SyftDatasetManager(syftbox_client=lowside_client)
lowside_dataset_manager.get_all()

In [None]:
dataset = lowside_dataset_manager.get(name=DATASET_NAME)

dataset.describe()

In [None]:
dataset.mock_dir

In [None]:
# Sanity check: the private data is not available on the low side (should raise FileNotFoundError)
# TODO improve error reporting for this case
dataset.private_dir

In [None]:
syhl.sync(syftbox_client=highside_client)