# Test: Data Scientist (Client2)

Automated test notebook for the Data Scientist workflow.
Run with: `jupyter execute sc_test_ds.ipynb`

In [None]:
import sys
from pathlib import Path

# Configuration
TIMEOUT = 120.0

In [None]:
import scanpy as sc
import matplotlib.pyplot as plt
from beaver import Twin
import beaver

bv = beaver.connect("shared", user="client2")
print("[DS] Connected as client2")

In [None]:
# Signal to DO that we're ready
print("[DS] Sending ready signal to DO...")
bv.send({"status": "ready"}, name="ds_ready", user="client1")
print("[DS] Sent ds_ready signal")

In [None]:
# Wait for Twin to be ready
print("[DS] Waiting for Twin ready signal...")
env, obj = bv.wait_for_message(
    filter_name="do_twin_ready",
    timeout=TIMEOUT,
    poll_interval=1.0,
)
assert env is not None, "Timeout waiting for Twin ready signal"
print(f"[DS] Twin is ready (signal from {env.sender})")

In [None]:
# Load Twin from peer
print("[DS] Loading Twin from client1...")
patient_sc = bv.peer("client1").remote_vars["patient_sc"].load(auto_accept=True)

assert patient_sc is not None, "Failed to load Twin"
assert hasattr(patient_sc, 'public'), "Twin has no public attribute"
print(f"[DS] Loaded Twin: {patient_sc}")
print(f"[DS] Public data: {patient_sc.public.n_obs} cells")

## Step 1: Violin Plot

In [None]:
print("[DS] Running violin plot analysis...")

@bv
def make_violin(adata):
    print("n_cells:", adata.n_obs)
    sc.pl.violin(adata, ["n_genes_by_counts", "total_counts", "pct_counts_mt"], size=0, multi_panel=True)
    return plt.gcf()

violin_result = make_violin(patient_sc)

assert violin_result is not None, "Violin computation returned None"
assert hasattr(violin_result, 'public'), "Result has no public attribute"
print(f"[DS] Violin public stdout: {violin_result.public_stdout[:50]}...")

In [None]:
print("[DS] Requesting private execution...")
violin_result.request_private()

print("[DS] Waiting for violin result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for violin result"

assert violin_result.private is not None, "Private result not received"
print(f"[DS] ✓ Violin private result received")
if violin_result.private_stdout:
    print(f"[DS] Private stdout: {violin_result.private_stdout[:50]}...")

## Step 2: Embedding Plot

In [None]:
print("[DS] Running embedding analysis...")

@bv
def show_embedding(adata):
    print("n_cells:", adata.n_obs)
    return sc.pl.embedding(
        adata,
        basis='X_umap',
        color=['pct_counts_mt', 'cell_type'],
        ncols=1, size=15,
        frameon=False,
        show=False
    )

embedding_result = show_embedding(patient_sc)

assert embedding_result is not None, "Embedding computation returned None"
print(f"[DS] Embedding public figures: {len(embedding_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
embedding_result.request_private()

print("[DS] Waiting for embedding result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for embedding result"

print(f"[DS] ✓ Embedding private result received")

## Step 3: PCA Variance

In [None]:
print("[DS] Running PCA variance analysis...")

@bv
def plot_pca_variance(adata):
    adata.layers['raw_counts'] = adata.X.copy()
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor='seurat_v3', layer='raw_counts', n_top_genes=5000)
    sc.pp.pca(adata, use_highly_variable=True)
    with plt.rc_context({'figure.figsize': (4, 3)}):
        sc.pl.pca_variance_ratio(adata, n_pcs=50)

pca_result = plot_pca_variance(patient_sc)

assert pca_result is not None, "PCA computation returned None"
print(f"[DS] PCA public figures: {len(pca_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
pca_result.request_private()

print("[DS] Waiting for PCA result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for PCA result"

print(f"[DS] ✓ PCA private result received")

## Step 4: UMAP Embedding

In [None]:
print("[DS] Running UMAP embedding analysis...")

@bv
def umap_embedding(adata):
    sc.pp.neighbors(adata, n_pcs=50)
    sc.tl.umap(adata)
    return sc.pl.embedding(
        adata,
        basis='X_umap',
        color=['cell_type'],
        ncols=1, size=15,
        frameon=False,
        show=True
    )

umap_result = umap_embedding(patient_sc)

assert umap_result is not None, "UMAP computation returned None"
print(f"[DS] UMAP public figures: {len(umap_result.public_figures)}")

In [None]:
print("[DS] Requesting private execution...")
umap_result.request_private()

print("[DS] Waiting for UMAP result...")
env, _ = bv.wait_for_message(timeout=TIMEOUT, poll_interval=1.0)
assert env is not None, "Timeout waiting for UMAP result"

print(f"[DS] ✓ UMAP private result received")

In [None]:
# Final verification
print("\n" + "="*50)
print("[DS] TEST PASSED")
print("[DS] Successfully completed all 4 analysis steps:")
print("  1. Violin plot")
print("  2. Embedding plot")
print("  3. PCA variance")
print("  4. UMAP embedding")
print("="*50)

# Verify inbox has all results
inbox = bv.inbox()
print(f"\n[DS] Inbox contains {len(inbox)} messages")
assert len(inbox) >= 4, f"Expected at least 4 results in inbox, got {len(inbox)}"