# Test: Data Owner (Client1) - SyftBox Mode

Automated test notebook for the Data Owner workflow with SyftBox encryption.
Run with: `jupyter execute sc_test_do.ipynb`

In [1]:
import sys
from pathlib import Path

# Configuration
TIMEOUT = 120.0
USER_EMAIL = "client1@sandbox.local"
PEER_EMAIL = "client2@sandbox.local"
DATA_DIR = Path.cwd()

In [2]:
!uv pip install scanpy

[2mAudited [1m1 package[0m [2min 51ms[0m[0m


In [3]:
import scanpy as sc
import anndata as ad
from beaver import Twin
from beaver.runtime import TrustedLoader
import beaver

# Connect with SyftBox backend
bv = beaver.connect(
    user=USER_EMAIL,
    data_dir=DATA_DIR,
)
print(f"[DO] Connected as {bv.user}")
print(f"[DO] SyftBox enabled: {bv.syftbox_enabled}")

üîÑ Auto-load replies enabled for client1@sandbox.local (polling every 2.0s)
[DO] Connected as client1@sandbox.local
[DO] SyftBox enabled: True


In [4]:
# Register AnnData serialization
@TrustedLoader.register(ad.AnnData)
def anndata_serialize_file(obj, path):
    obj.write_h5ad(path)

@TrustedLoader.register(ad.AnnData)
def anndata_deserialize_file(path):
    return ad.read_h5ad(path)

print("[DO] Registered AnnData loader")

[DO] Registered AnnData loader


In [5]:
# Wait for session request from DS
import time

print("[DO] Waiting for session request from DS...")
session = None
deadline = time.monotonic() + TIMEOUT

while time.monotonic() < deadline:
    requests = bv.session_requests()
    if len(requests) > 0:
        # Accept the first request
        session = requests[0].accept()
        print(f"[DO] Accepted session from {requests[0].requester}")
        print(f"[DO] Session ID: {session.session_id}")
        break
    time.sleep(1.0)

assert session is not None, "Timeout waiting for session request"

[DO] Waiting for session request from DS...
‚úÖ Session accepted: 83f95762d0bd
   Peer: client2@sandbox.local
   Session folder: /Users/madhavajay/dev/biovault-beaver/workspace2/sandbox/client1@sandbox.local/datasites/client1@sandbox.local/shared/biovault/sessions/83f95762d0bd
[DO] Accepted session from client2@sandbox.local
[DO] Session ID: 83f95762d0bd


In [6]:
# Load single-cell data
data_dir = Path("single_cell/data")
private_path = data_dir / "sc_RNAseq_adata_downsampled_to5percent.private.h5ad"
mock_path = data_dir / "sc_RNAseq_adata_downsampled_to5percent.mock.h5ad"
sim_path = data_dir / "adata_simulated.h5ad"

# Create mock from simulated if needed
if not mock_path.exists() and sim_path.exists():
    print("[DO] Creating mock data from simulated...")
    adata_sim = sc.read(sim_path)
    adata_sim.obs.rename(columns={"pct_counts_in_top_50_genes": "pct_counts_mt"}, inplace=True)
    adata_sim.obs.rename(columns={"group": "cell_type"}, inplace=True)
    adata_sim.write_h5ad(mock_path)

adata_private = sc.read(private_path)
adata_mock = sc.read(mock_path)

print(f"[DO] Loaded private: {adata_private.n_obs} cells")
print(f"[DO] Loaded mock: {adata_mock.n_obs} cells")

assert adata_private.n_obs > 0, "Private data is empty"
assert adata_mock.n_obs > 0, "Mock data is empty"

[DO] Loaded private: 7876 cells
[DO] Loaded mock: 30000 cells


In [7]:
# Create and publish Twin to session
patient_sc = Twin(
    private=adata_private,
    public=adata_mock,
    owner=USER_EMAIL,
    name="patient_sc",
)

# Publish to session (encrypted for peer only)
session.remote_vars["patient_sc"] = patient_sc
print("[DO] Published patient_sc Twin to session")

# Track processed requests
processed_requests = set()

üåç Using PUBLIC data from Twin 'patient_sc...'
üì¢ Published Twin 'patient_sc' (public side available at: /Users/madhavajay/dev/biovault-beaver/workspace2/sandbox/client1@sandbox.local/datasites/client1@sandbox.local/shared/biovault/sessions/83f95762d0bd/data/8430c58770404d6a83761ae9764afe6a.beaver)
[DO] Published patient_sc Twin to session


In [11]:
bv.inbox()

In [10]:
# Request 1: Violin plot
print("[DO] Waiting for request 1/4 (violin)...")
bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)

[DO] Waiting for request 1/4 (violin)...


KeyboardInterrupt: 

In [9]:
# Find the first unprocessed computation request in inbox
inbox = bv.inbox()
request_envs = [e for e in inbox if e.name and e.name.startswith("request_") and e.name not in processed_requests]
assert len(request_envs) > 0, "No new request found in inbox"
request_env = request_envs[0]
print(f"[DO] Received: {request_env.name}")

# Load it - this injects the request into globals
request_env.load()
processed_requests.add(request_env.name)

# The loaded variable name is the envelope name
request_obj = globals()[request_env.name]
print(f"[DO] Running: {request_env.name}")

result = request_obj.run_both()
assert result is not None, "run_both() returned None"

result.approve()
print("[DO] ‚úì Request 1 complete")

[DO] Waiting for request 1/4 (violin)...


KeyboardInterrupt: 

In [None]:
# Request 2: Embedding
print("[DO] Waiting for request 2/4 (embedding)...")
bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)

inbox = bv.inbox()
request_envs = [e for e in inbox if e.name and e.name.startswith("request_") and e.name not in processed_requests]
assert len(request_envs) > 0, "No new request found in inbox"
request_env = request_envs[0]
print(f"[DO] Received: {request_env.name}")

request_env.load()
processed_requests.add(request_env.name)
request_obj = globals()[request_env.name]
print(f"[DO] Running: {request_env.name}")

result = request_obj.run_both()
assert result is not None, "run_both() returned None"

result.approve()
print("[DO] ‚úì Request 2 complete")

In [None]:
# Request 3: PCA variance
print("[DO] Waiting for request 3/4 (pca)...")
bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)

inbox = bv.inbox()
request_envs = [e for e in inbox if e.name and e.name.startswith("request_") and e.name not in processed_requests]
assert len(request_envs) > 0, "No new request found in inbox"
request_env = request_envs[0]
print(f"[DO] Received: {request_env.name}")

request_env.load()
processed_requests.add(request_env.name)
request_obj = globals()[request_env.name]
print(f"[DO] Running: {request_env.name}")

result = request_obj.run_both()
assert result is not None, "run_both() returned None"

result.approve()
print("[DO] ‚úì Request 3 complete")

In [None]:
# Request 4: UMAP
print("[DO] Waiting for request 4/4 (umap)...")
bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)

inbox = bv.inbox()
request_envs = [e for e in inbox if e.name and e.name.startswith("request_") and e.name not in processed_requests]
assert len(request_envs) > 0, "No new request found in inbox"
request_env = request_envs[0]
print(f"[DO] Received: {request_env.name}")

request_env.load()
processed_requests.add(request_env.name)
request_obj = globals()[request_env.name]
print(f"[DO] Running: {request_env.name}")

result = request_obj.run_both()
assert result is not None, "run_both() returned None"

result.approve()
print("[DO] ‚úì Request 4 complete")

In [None]:
# Final assertions
print("\n" + "="*50)
print("[DO] TEST PASSED")
print("[DO] Successfully handled 4 computation requests")
print("="*50)