In [None]:
from syft_runtimes.high_low.setup import (
    initialize_high_datasite,
    initialize_sync_config,
    high_side_connect,
    sync,
)

# Initializing the high datasite

In [None]:
# Low side settings
lowside_syftbox_dir = "~/.syftbox/clients/alice@openmined.org"

# ssh_config: dict = {
#     "low_ssh_host": "example.com",
#     "low_ssh_user": "username",
#     "low_ssh_port": 22,
#     "low_ssh_key_path": "/path/to/private/key",
# }

# For local testing, without SSH connection
ssh_config = {}

In [None]:
# High side settings
highside_email = "data_owner@private.com"
highside_identifier = "highside-1234"

In [None]:
# First, initialize the high datasite
initialize_high_datasite(email=highside_email, force_overwrite=True)
high_syftbox_client = high_side_connect(email=highside_email)

# Initial sync

In [None]:
sync_config = initialize_sync_config(
    syftbox_client=high_syftbox_client,
    lowside_syftbox_dir=lowside_syftbox_dir,
    highside_identifier=highside_identifier,
    force_overwrite=True,
    **ssh_config,
)

In [None]:
sync(syftbox_client=high_syftbox_client)

# Create a high-side dataset

In [None]:
import syft_datasets as syd
from syft_core.config import CONFIG_PATH_ENV
import os

# Set the high client as default syftbox client
os.environ[CONFIG_PATH_ENV] = str(
    high_syftbox_client.workspace.data_dir / "config.json"
)

In [None]:
# Create some random mock and private data

from pathlib import Path
import random
import pandas as pd

num_mock_rows = 100
num_private_rows = 100

mock_data = {
    "age": [random.randint(0, 100) for _ in range(num_mock_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_mock_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_mock_rows)],
}
private_data = {
    "age": [random.randint(0, 100) for _ in range(num_private_rows)],
    "height": [random.uniform(150, 200) for _ in range(num_private_rows)],
    "income": [random.randint(20000, 100000) for _ in range(num_private_rows)],
}

mock_df = pd.DataFrame(mock_data)
private_df = pd.DataFrame(private_data)

data_dir = Path("./data")
data_dir.mkdir(parents=True, exist_ok=True)
mock_df.to_csv(data_dir / "mock_data.csv", index=False)
private_df.to_csv(data_dir / "private_data.csv", index=False)

In [None]:
readme_content = """
# My example high-side dataset
"""
readme_path = data_dir / "README.md"
readme_path.write_text(readme_content)

In [None]:
highside_dataset = syd.create(
    name="highside_example_dataset",
    mock_path=data_dir / "mock_data.csv",
    private_path=data_dir / "private_data.csv",
    readme_path=readme_path,
    summary="This is a mock dataset for demonstration purposes.",
    tags=["example", "testing", "highside"],
)