In [None]:
# Environment setup (Colab or local)
import sys
from pathlib import Path

IN_COLAB = "google.colab" in sys.modules
print(f"Environment: {'Colab' if IN_COLAB else 'Local'}")

if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/drive")
    CREDS_DIR = Path("/content/drive/MyDrive/syft-creds")
    
    # Install packages
    !pip install -q syft-client syft-bg syft-approve syft-notify syft-job syft-datasets
else:
    CREDS_DIR = Path.home() / ".syft-creds"

print(f"Credentials dir: {CREDS_DIR}")
print(f"Config exists: {(CREDS_DIR / 'config.yaml').exists()}")

In [None]:
# Setup
import syft_client as sc
import subprocess, json, uuid, shutil, time, os

email_do = "test1@openmined.org"
email_ds = "test2@openmined.org"

token_do = CREDS_DIR / "token_do.json"
token_ds = CREDS_DIR / "token_ds.json"

# Convert DS token if needed (handles nested format)
ds_nested_paths = [
    Path.home() / ".syft/test2_at_openmined_org/tokens/google_org.json",
    CREDS_DIR / "test2_token_nested.json",
]
for ds_nested in ds_nested_paths:
    if ds_nested.exists() and not token_ds.exists():
        with open(ds_nested) as f:
            data = json.load(f)
        with open(token_ds, "w") as f:
            json.dump(data.get("token_data", data), f)
        break

print(f"DO token: {token_do.exists()}")
print(f"DS token: {token_ds.exists()}")

In [None]:
# Start daemons
subprocess.run(["syft-bg", "stop"], capture_output=True)  # Clean start
time.sleep(1)
subprocess.run(["syft-bg", "start"], capture_output=True)
time.sleep(3)
!syft-bg status

In [None]:
# Login
client_do = sc.login_do(email=email_do, token_path=token_do)
client_ds = sc.login_ds(email=email_ds, token_path=token_ds)
print(f"DO: {client_do.email}")
print(f"DS: {client_ds.email}")

In [None]:
# DS adds DO as peer
client_ds.load_peers()
if email_do not in [p.email for p in client_ds.version_manager.approved_peers]:
    client_ds.add_peer(email_do)
    print(f"Peer request sent to {email_do}")
else:
    print("Already peers")

In [None]:
# Wait for peer auto-approval
for i in range(12):
    try:
        client_do.load_peers()
        client_ds.load_peers()
        do_peers = [p.email for p in client_do.version_manager.approved_peers]
        ds_peers = [p.email for p in client_ds.version_manager.approved_peers]
        if email_ds in do_peers and email_do in ds_peers:
            print(f"Peers connected: {do_peers}")
            break
        print(f"Waiting... pending: {[p.email for p in client_do.version_manager.pending_peers]}")
    except Exception as e:
        print(f"Waiting... {type(e).__name__}")
    time.sleep(5)
else:
    print("Timeout - check: syft-bg logs approve")

In [None]:
# Create dataset
import pandas as pd

df = pd.DataFrame([
    {"id": "c1", "role": "user", "text": "how do I protect my identity online?"},
    {"id": "c2", "role": "user", "text": "what are the best privacy tools?"},
    {"id": "c3", "role": "user", "text": "how to secure personal information?"},
    {"id": "c4", "role": "user", "text": "what is the weather today?"},
])

tmp = Path("/tmp" if not IN_COLAB else "/content")
mock_path, private_path = tmp / "mock.csv", tmp / "private.csv"
df.head(2).to_csv(mock_path, index=False)
df.to_csv(private_path, index=False)

dataset_name = "TestData"
for p in [client_do.syftbox_folder/client_do.email/"public"/"syft_datasets"/dataset_name,
          client_do.syftbox_folder/"private"/"syft_datasets"/dataset_name]:
    if p.exists(): shutil.rmtree(p)

client_do.create_dataset(name=dataset_name, mock_path=str(mock_path), private_path=str(private_path),
                         summary="Test", tags=["test"])
client_do.sync()
print(f"Dataset: {dataset_name}")

In [None]:
# Create job
tmp = Path("/tmp" if not IN_COLAB else "/content")
job_dir = tmp / "test_job"
if job_dir.exists(): shutil.rmtree(job_dir)
job_dir.mkdir()

(job_dir / "params.json").write_text(json.dumps({
    "SIMILARITY_PROMPT": "protect identity",
    "SIMILARITY_THRESHOLD": 0.1
}))

(job_dir / "main.py").write_text('''import json, os, pandas as pd, syft_client as sc
with open("params.json") as f: params = json.load(f)
df = pd.read_csv(sc.resolve_dataset_file_path("TestData"))
def sim(a, b): w1, w2 = set(a.lower().split()), set(b.lower().split()); return len(w1&w2)/len(w1|w2) if w1|w2 else 0
df["score"] = df["text"].apply(lambda x: sim(x, params["SIMILARITY_PROMPT"]))
results = df[df["score"] >= params["SIMILARITY_THRESHOLD"]][["id","text","score"]].to_dict("records")
os.makedirs("outputs", exist_ok=True)
with open("outputs/results.json", "w") as f: json.dump(results, f)
print(f"Matches: {len(results)}")
''')

print(f"Job: {list(job_dir.iterdir())}")

In [None]:
# Submit job
job_name = f"job_{uuid.uuid4().hex[:6]}"
client_ds.submit_python_job(user=email_do, code_path=str(job_dir), job_name=job_name, dependencies=["pandas"])
print(f"Submitted: {job_name}")

In [None]:
# Wait for auto-approval
for i in range(6):
    client_do.sync()
    jobs = [j for j in client_do.job_client.jobs if j.name == job_name]
    if jobs and jobs[0].status == "approved":
        print("Approved")
        break
    print(f"Waiting... {jobs[0].status if jobs else 'pending'}")
    time.sleep(5)
else:
    print("Timeout")

In [None]:
# Execute
client_do.job_runner.process_approved_jobs()
client_do.sync()
job = [j for j in client_do.job_client.jobs if j.name == job_name][0]
print(f"Status: {job.status}")
if job.stdout: print(job.stdout)

In [None]:
# DS results
client_ds.sync()
ds_job = [j for j in client_ds.job_client.jobs if j.name == job_name][0]
print(f"Status: {ds_job.status}")
if ds_job.output_paths:
    with open(ds_job.output_paths[0]) as f: print(json.dumps(json.load(f), indent=2))

In [None]:
# Logs
!syft-bg logs approve
print("---")
!syft-bg logs notify

In [None]:
# Cleanup
!syft-bg stop
!syft-bg status