# Test: Data Scientist (Client2) - SyftBox Mode

Automated test notebook for the Data Scientist workflow with SyftBox encryption.
Run with: `jupyter execute sc_test_ds.ipynb`

In [1]:
import sys
from pathlib import Path

# Configuration
TIMEOUT = 120.0
USER_EMAIL = "client2@sandbox.local"
PEER_EMAIL = "client1@sandbox.local"
DATA_DIR = Path.cwd()

In [2]:
!uv pip install scanpy

[2K[2mResolved [1m40 packages[0m [2min 26ms[0m[0m                                         [0m
[2K[2mInstalled [1m31 packages[0m [2min 297ms[0m[0m2.0                           [0m
 [32m+[39m [1manndata[0m[2m==0.12.6[0m
 [32m+[39m [1marray-api-compat[0m[2m==1.12.0[0m
 [32m+[39m [1mcontourpy[0m[2m==1.3.3[0m
 [32m+[39m [1mcycler[0m[2m==0.12.1[0m
 [32m+[39m [1mdonfig[0m[2m==0.8.1.post1[0m
 [32m+[39m [1mfonttools[0m[2m==4.60.1[0m
 [32m+[39m [1mgoogle-crc32c[0m[2m==1.7.1[0m
 [32m+[39m [1mh5py[0m[2m==3.15.1[0m
 [32m+[39m [1mjoblib[0m[2m==1.5.2[0m
 [32m+[39m [1mkiwisolver[0m[2m==1.4.9[0m
 [32m+[39m [1mlegacy-api-wrap[0m[2m==1.5[0m
 [32m+[39m [1mllvmlite[0m[2m==0.45.1[0m
 [32m+[39m [1mmatplotlib[0m[2m==3.10.7[0m
 [32m+[39m [1mnatsort[0m[2m==8.4.0[0m
 [32m+[39m [1mnetworkx[0m[2m==3.6[0m
 [32m+[39m [1mnumba[0m[2m==0.62.1[0m
 [32m+[39m [1mnumcodecs[0m[2m==0.16.5[0m
 [32m+[39m [1m

In [3]:
import scanpy as sc
import matplotlib.pyplot as plt
from beaver import Twin
import beaver

# Connect with SyftBox backend
bv = beaver.connect(
    user=USER_EMAIL,
    data_dir=DATA_DIR,
)
print(f"[DS] Connected as {bv.user}")
print(f"[DS] SyftBox enabled: {bv.syftbox_enabled}")

üîÑ Auto-load replies enabled for client2@sandbox.local (polling every 2.0s)
[DS] Connected as client2@sandbox.local
[DS] SyftBox enabled: True


In [4]:
# Request a session with the data owner
print(f"[DS] Requesting session with {PEER_EMAIL}...")
session = bv.request_session(
    peer_email=PEER_EMAIL,
    message="Requesting access for single-cell analysis test"
)

print(f"[DS] Session requested: {session.session_id}")
print(f"[DS] Waiting for session acceptance...")

# Wait for session to be accepted
session.wait_for_acceptance(timeout=TIMEOUT)
print(f"[DS] ‚úì Session accepted!")

[DS] Requesting session with client1@sandbox.local...
üì§ Session request sent to client1@sandbox.local
   Session ID: 83f95762d0bd
   Use session.wait_for_acceptance() to wait for approval
[DS] Session requested: 83f95762d0bd
[DS] Waiting for session acceptance...
‚è≥ Waiting for client1@sandbox.local to accept session 83f95762d0bd...
üìÅ Created session folder: /Users/madhavajay/dev/biovault-beaver/workspace2/sandbox/client2@sandbox.local/datasites/client2@sandbox.local/shared/biovault/sessions/83f95762d0bd
‚úÖ Session 83f95762d0bd accepted!
[DS] ‚úì Session accepted!


In [6]:
# Load Twin from session - use peer_remote_vars to read DO's data
import time

print("[DS] Waiting for Twin to be published...")
patient_sc = None
deadline = time.monotonic() + TIMEOUT

while time.monotonic() < deadline:
    try:
        # peer_remote_vars reads from peer's session folder
        patient_sc = session.peer_remote_vars["patient_sc"].load(auto_accept=True)
        if patient_sc is not None:
            break
    except (KeyError, Exception):
        pass
    time.sleep(1.0)

assert patient_sc is not None, "Failed to load Twin"
assert hasattr(patient_sc, 'public'), "Twin has no public attribute"
print(f"[DS] Loaded Twin: {patient_sc}")
print(f"[DS] Public data: {patient_sc.public.n_obs} cells")

[DS] Waiting for Twin to be published...
‚úì Loaded Twin 'patient_sc' from published location
[DS] Loaded Twin: üåç Twin: patient_sc [32m(MOCK DATA - SAFE)[0m
  üîí Private    (not available) üí° .request_private()
  [32müåç Public[0m    <AnnData (preview): AnnData n_obs=30000, n_vars=36601 ‚Äî r...    ‚Üê .value uses this
  Owner: client1@sandbox.local
  Live: ‚ö´ Disabled
  IDs: twin=26149626... private=c965d230... public=37d37542...
[DS] Public data: 30000 cells


## Step 1: Violin Plot

In [7]:
print("[DS] Running violin plot analysis...")

@bv
def make_violin(adata):
    print("n_cells:", adata.n_obs)
    sc.pl.violin(adata, ["n_genes_by_counts", "total_counts", "pct_counts_mt"], size=0, multi_panel=True)
    return plt.gcf()

violin_result = make_violin(patient_sc)

assert violin_result is not None, "Violin computation returned None"
assert hasattr(violin_result, 'public'), "Result has no public attribute"
print(f"[DS] Violin public stdout: {violin_result.public_stdout[:50]}...")

[DS] Running violin plot analysis...
[DS] Violin public stdout: n_cells: 30000
...


In [9]:
print("[DS] Requesting private execution...")
violin_result.request_private()

[DS] Requesting private execution...
üì® Sending computation request to client1@sandbox.local
   Function: make_violin
   Result: violin_result
‚úì Sent to /Users/madhavajay/dev/biovault-beaver/workspace2/sandbox/client2@sandbox.local/datasites/client2@sandbox.local/shared/client1@sandbox.local/ea4bcada01cf41d68daa9aacc3385b70.beaver
üí° Result will auto-update when client1@sandbox.local approves


In [8]:
print("[DS] Waiting for violin result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for violin result"

assert violin_result.private is not None, "Private result not received"
print(f"[DS] ‚úì Violin private result received")
if violin_result.private_stdout:
    print(f"[DS] Private stdout: {violin_result.private_stdout[:50]}...")

[DS] Requesting private execution...
üì® Sending computation request to client1@sandbox.local
   Function: make_violin
   Result: violin_result
‚úì Sent to /Users/madhavajay/dev/biovault-beaver/workspace2/sandbox/client2@sandbox.local/datasites/client2@sandbox.local/shared/client1@sandbox.local/2a21996bb1254654a6214cfa3802bf88.beaver
üí° Result will auto-update when client1@sandbox.local approves
[DS] Waiting for violin result...


KeyboardInterrupt: 

## Step 2: Embedding Plot

In [None]:
print("[DS] Running embedding analysis...")

@bv
def show_embedding(adata):
    print("n_cells:", adata.n_obs)
    return sc.pl.embedding(
        adata,
        basis='X_umap',
        color=['pct_counts_mt', 'cell_type'],
        ncols=1, size=15,
        frameon=False,
        show=False
    )

embedding_result = show_embedding(patient_sc)

assert embedding_result is not None, "Embedding computation returned None"
print(f"[DS] Embedding public figures: {len(embedding_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
embedding_result.request_private()

print("[DS] Waiting for embedding result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for embedding result"

print(f"[DS] ‚úì Embedding private result received")

## Step 3: PCA Variance

In [None]:
print("[DS] Running PCA variance analysis...")

@bv
def plot_pca_variance(adata):
    adata.layers['raw_counts'] = adata.X.copy()
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor='seurat_v3', layer='raw_counts', n_top_genes=5000)
    sc.pp.pca(adata, use_highly_variable=True)
    with plt.rc_context({'figure.figsize': (4, 3)}):
        sc.pl.pca_variance_ratio(adata, n_pcs=50)

pca_result = plot_pca_variance(patient_sc)

assert pca_result is not None, "PCA computation returned None"
print(f"[DS] PCA public figures: {len(pca_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
pca_result.request_private()

print("[DS] Waiting for PCA result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for PCA result"

print(f"[DS] ‚úì PCA private result received")

## Step 4: UMAP Embedding

In [None]:
print("[DS] Running UMAP embedding analysis...")

@bv
def umap_embedding(adata):
    sc.pp.neighbors(adata, n_pcs=50)
    sc.tl.umap(adata)
    return sc.pl.embedding(
        adata,
        basis='X_umap',
        color=['cell_type'],
        ncols=1, size=15,
        frameon=False,
        show=True
    )

umap_result = umap_embedding(patient_sc)

assert umap_result is not None, "UMAP computation returned None"
print(f"[DS] UMAP public figures: {len(umap_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
umap_result.request_private()

print("[DS] Waiting for UMAP result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for UMAP result"

print(f"[DS] ‚úì UMAP private result received")

In [None]:
# Final verification
print("\n" + "="*50)
print("[DS] TEST PASSED")
print("[DS] Successfully completed all 4 analysis steps:")
print("  1. Violin plot")
print("  2. Embedding plot")
print("  3. PCA variance")
print("  4. UMAP embedding")
print("="*50)

# Verify inbox has all results
inbox = bv.inbox()
print(f"\n[DS] Inbox contains {len(inbox)} messages")
assert len(inbox) >= 4, f"Expected at least 4 results in inbox, got {len(inbox)}"