+ Load people.csv → list of dicts
+ Build unweighted NetworkX graph: connect if same school/company or ≥2 shared skills
+ BFS from “You” to a target node → show path


# MVP — Batch test extraction on 10–20 profiles

This notebook:
1. Loads `sample_profiles.jsonl`
2. Calls your API `POST /extract-profiles` for each text
3. Upserts all results via `POST /ingest-people`
4. Shows a preview and the saved `data/people.json`

> Make sure your server is running first:
```bash
uvicorn main:app --reload --port 8000
```


In [None]:

%pip install -q requests pandas networkx
import json, requests, time, pandas as pd
from pathlib import Path

BASE_URL = "http://127.0.0.1:8000"
SAMPLES = Path("../sample_profiles.jsonl")
OUT = Path("../data/people.json")

rows = [json.loads(line) for line in SAMPLES.read_text(encoding="utf-8").splitlines() if line.strip()]
print("Loaded samples:", len(rows))

people = []
for r in rows:
    payload = {"text": r["text"]}
    resp = requests.post(f"{BASE_URL}/extract-profiles", json=payload, timeout=60)
    if resp.status_code != 200:
        print("ERR", resp.status_code, resp.text[:200])
        continue
    prof = resp.json()
    prof["_id"] = r["id"]
    people.append(prof)
    time.sleep(0.2)  # gentle

print("Extracted", len(people), "profiles")

# Ingest
resp = requests.post(f"{BASE_URL}/ingest-people", json={"people": people}, timeout=60)
print("Ingest:", resp.status_code, resp.text)

# Preview
df = pd.DataFrame(people)
df.head(10)


In [None]:

# Inspect saved JSON (optional)
from pathlib import Path
p = Path("../data/people.json")
print("people.json exists:", p.exists(), "size:", p.stat().st_size if p.exists() else 0)
print(p.read_text(encoding="utf-8")[:1000])
