# syft-bg E2E Test

Tests the full flow: peer auto-approval → dataset creation → job submission → auto-approval → execution → results

In [None]:
# Setup
import sys
from pathlib import Path

IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/drive")
    CREDS_DIR = Path("/content/drive/MyDrive/syft-creds")
    !pip install -q syft-client
else:
    CREDS_DIR = Path.home() / ".syft-creds"

print(f"Creds: {CREDS_DIR}, Config: {(CREDS_DIR / 'config.yaml').exists()}")

In [None]:
# Init syft-bg (run once to set up config and OAuth)
# This is interactive - prompts for email, SyftBox path, and runs OAuth for Gmail/Drive

import subprocess

config_path = CREDS_DIR / "config.yaml"
if not config_path.exists():
    print("Running syft-bg init...")
    # Interactive init - will prompt for email and run OAuth
    !syft-bg init -f main.py,params.json
else:
    print(f"Config exists: {config_path}")
    # Show current config
    !cat {config_path}

In [None]:
# Config
EMAIL_DO = "test1@openmined.org"
EMAIL_DS = "test2@openmined.org"
TOKEN_DO = CREDS_DIR / "token_do.json"
TOKEN_DS = CREDS_DIR / "token_ds.json"

print(f"DO token: {TOKEN_DO.exists()}, DS token: {TOKEN_DS.exists()}")

In [None]:
# Start daemons
!syft-bg stop
!syft-bg start
!syft-bg status

In [None]:
# Login
import syft_client as sc

client_do = sc.login_do(email=EMAIL_DO, token_path=TOKEN_DO)
client_ds = sc.login_ds(email=EMAIL_DS, token_path=TOKEN_DS)
print(f"DO: {client_do.email}, DS: {client_ds.email}")

In [None]:
# Add peer (DS → DO)
import time

client_ds.add_peer(EMAIL_DO)

# Wait for auto-approval (~10-30s)
for i in range(10):
    time.sleep(3)
    client_do.load_peers()
    if EMAIL_DS in [p.email for p in client_do.version_manager.approved_peers]:
        print(f"Peer approved")
        break
    print(f"Waiting...")

In [None]:
# Create dataset
import pandas as pd
import shutil

df = pd.DataFrame([{"id": "1", "text": "test data"}])
df.to_csv("/tmp/data.csv", index=False)

DATASET = "TestData"
for p in [client_do.syftbox_folder/client_do.email/"public"/"syft_datasets"/DATASET,
          client_do.syftbox_folder/"private"/"syft_datasets"/DATASET]:
    if p.exists(): shutil.rmtree(p)

client_do.create_dataset(name=DATASET, mock_path="/tmp/data.csv", 
                         private_path="/tmp/data.csv", summary="Test", tags=["test"])
client_do.sync()
print(f"Dataset: {DATASET}")

In [None]:
# Create and submit job
import json
import uuid

job_dir = Path("/tmp/test_job")
if job_dir.exists(): shutil.rmtree(job_dir)
job_dir.mkdir()

(job_dir / "params.json").write_text('{}')
(job_dir / "main.py").write_text(
    'import pandas as pd, syft_client as sc\n'
    'df = pd.read_csv(sc.resolve_dataset_file_path("TestData"))\n'
    'print(f"Rows: {len(df)}")'
)

JOB_NAME = f"job_{uuid.uuid4().hex[:6]}"
client_ds.submit_python_job(user=EMAIL_DO, code_path=str(job_dir), 
                            job_name=JOB_NAME, dependencies=["pandas"])
print(f"Submitted: {JOB_NAME}")

In [None]:
# Wait for job completion (~20-30s)
start = time.time()
for i in range(12):
    time.sleep(3)
    client_do.sync()
    jobs = [j for j in client_do.job_client.jobs if j.name == JOB_NAME]
    if jobs and jobs[0].status == "done":
        print(f"Done in {time.time()-start:.0f}s")
        break
    print(f"Status: {jobs[0].status if jobs else 'syncing'}")

In [None]:
# Check results (DS side)
client_ds.sync()
job = [j for j in client_ds.job_client.jobs if j.name == JOB_NAME][0]
print(f"Status: {job.status}")
if job.output_paths:
    print(f"Outputs: {job.output_paths}")

In [None]:
# Check logs
!syft-bg logs approve -n 20
print("---")
!syft-bg logs notify -n 20

In [None]:
# Cleanup
!syft-bg stop