In [1]:
from syft_rds.orchestra import setup_rds_stack

In [2]:
stack = setup_rds_stack(log_level="INFO")

[32m2025-03-05 12:39:50.072[0m | [1mINFO    [0m | [36msyft_rds.orchestra[0m:[36msetup_rds_stack[0m:[36m112[0m - [1mLaunching mock RDS stack in /var/folders/6x/3xzhslqj2tx0sgx88x022qyw0000gp/T/rds_s72qvkj4...[0m


In [3]:
do_client = stack.do_rds_client
ds_client = stack.ds_rds_client

# Create a dataset as DO

In [4]:
import os
# make a mock dataset

os.makedirs("data/dataset-1/private", exist_ok=True)
os.makedirs("data/dataset-1/mock", exist_ok=True)
with open("data/dataset-1/private/data.csv", "w") as f:
    f.write("-1,-2,-3")

with open("data/dataset-1/mock/data.csv", "w") as f:
    f.write("1,2,3")


In [5]:
data = do_client.dataset.create(
    name="dataset-1",  # MUST BE UNIQUE. Throw Exception if already exist.
    path="/Users/azizwork/Workspace/rds/notebooks/data/dataset-1/private/",  # MUST EXIST
    mock_path="/Users/azizwork/Workspace/rds/notebooks/data/dataset-1/mock/",
)

In [6]:
do_client.datasets

[Dataset
   uid: 27ac4bcb-e13e-47b9-8e5d-d511f3a9e050
   created_at: 2025-03-05T11:39:50.126733Z
   updated_at: 2025-03-05T11:39:50.126737Z
   name: dataset-1
   private: syft://data_owner@test.openmined.org/private/datasets/dataset-1
   mock: syft://data_owner@test.openmined.org/public/datasets/dataset-1
   summary: None
   readme: None
   tags: []]

# Use dataset as DS

In [7]:
ds_client.datasets

[Dataset
   uid: 27ac4bcb-e13e-47b9-8e5d-d511f3a9e050
   created_at: 2025-03-05T11:39:50.126733Z
   updated_at: 2025-03-05T11:39:50.126737Z
   name: dataset-1
   private: syft://data_owner@test.openmined.org/private/datasets/dataset-1
   mock: syft://data_owner@test.openmined.org/public/datasets/dataset-1
   summary: None
   readme: None
   tags: []]

In [8]:
ds_client.dataset.get("dataset-1").get_mock_path()

PosixPath('/private/var/folders/6x/3xzhslqj2tx0sgx88x022qyw0000gp/T/rds_s72qvkj4/shared_client_dir/datasites/data_owner@test.openmined.org/public/datasets/dataset-1')

In [9]:
dataset = ds_client.dataset.get("dataset-1")
dataset.describe()

└── data.csv


True

In [10]:
import pathlib
from syft_rds.models.models import Dataset
from syft_rds.utils.render_code_template import render_data_access_template


def init_code(dataset: Dataset, path: str, overwrite: bool = False) -> None:
    path_ = pathlib.Path(path)
    if path_.suffix == ".py":
        path_.parent.mkdir(parents=True, exist_ok=True)
    else:
        raise ValueError(f"Invalid file extension: {path_.suffix}, must be .py")
    if path_.is_file() and not overwrite:
        raise FileExistsError(f"File {path} already exists")
    
    all_files = list(dataset.get_mock_path().glob("**/*"))
    if len(all_files) != 1:
        raise ValueError(f"Only one file is allowed in the dataset mock path, got {len(all_files)}")
    
    code = render_data_access_template(all_files[0].name)
    with open(path, "w") as f:
        f.write(code)


In [11]:
init_code(dataset, "/Users/azizwork/Workspace/rds/notebooks/data/code1.py", overwrite=True)

In [12]:
job = ds_client.jobs.submit(
    user_code_path="/Users/azizwork/Workspace/rds/notebooks/data/code1.py",
    dataset_name = dataset.name
)
job


Job
  uid: 3d34b39c-964a-479f-b33b-b1381ceb9c3a
  created_at: 2025-03-05T11:39:50.503681Z
  updated_at: 2025-03-05T11:39:50.503682Z
  name: robust_matrix_n34u
  description: None
  runtime: python
  user_code_id: 51fcf974-7add-4239-951e-26a1d5fd2aa9
  tags: []
  user_metadata: {}
  status: pending_code_review
  error: no_error
  output_url: syft://data_owner@test.openmined.org/api_data/RDS/output/3d34b39c-964a-479f-b33b-b1381ceb9c3a
  dataset_name: dataset-1

In [13]:
ds_client.run(job)

0