In [25]:
import json

def research_and_summarize_json(topic: str, n_results: int = 5, n_open: int = 3) -> dict:
    """
    Orchestrator pipeline.
    Returns structured JSON: {
        "topic": str,
        "sources": [...],
        "summary": {
            "demand": [...],
            "prices_costs": [...],
            "projects_policy": [...],
            "risks": [...],
            "outlook": [...]
        }
    }
    """
    # Step 1: Search
    results = web_search(topic, n=n_results)

    # Step 2: Fetch pages
    opened = []
    for r in results[:n_open]:
        try:
            page = fetch_page(r["url"])
            opened.append({
                "title": page["title"] or r["title"],
                "url": r["url"],
                "snippet": r["snippet"],
                "text": page["text"]
            })
        except Exception as e:
            opened.append({
                "title": r["title"],
                "url": r["url"],
                "snippet": r["snippet"],
                "text": f"[Fetch failed: {e}]"
            })

    if not opened:
        return {"topic": topic, "sources": [], "summary": {}}

    # Step 3: Build structured prompt asking for JSON
    sources_block = "\n\n".join(
        f"### Source {i+1}: {s['title']}\nURL: {s['url']}\n\n{s['text']}"
        for i, s in enumerate(opened)
    )

    prompt = f"""
You are a Market Researcher. Summarize the latest on "{topic}".

Use ONLY the facts from the sources provided below.
Respond STRICTLY in JSON with this schema:

{{
  "topic": "...",
  "sources": [
    {{"title": "...", "url": "..."}}
  ],
  "summary": {{
    "demand": ["...","..."],
    "prices_costs": ["...","..."],
    "projects_policy": ["...","..."],
    "risks": ["...","..."],
    "outlook": ["...","..."]
  }}
}}

Rules:
- Fill each list with concise bullet points (strings).
- Include citations like [S1], [S2] at the end of each bullet, using the source number.
- Do NOT add any extra text outside the JSON.

Here are the sources:
{sources_block}
"""

    resp = market_research_agent.run(prompt)

    # Try to parse JSON safely
    try:
        result_json = json.loads(resp.content)
    except Exception:
        # If model output isn't valid JSON, wrap it
        result_json = {
            "topic": topic,
            "sources": [{"title": s["title"], "url": s["url"]} for s in opened],
            "summary_raw": resp.content
        }

    return result_json


# -------------------------
# Example usage
# -------------------------
if __name__ == "__main__":
    topic = "Sri Lanka construction industry: current demand, material prices, government projects, and risks"
    result = research_and_summarize_json(topic, n_results=6, n_open=3)

    # Pretty print JSON
    import pprint
    pprint.pp(result)

    # Or dump to string
    print(json.dumps(result, indent=2))


{'topic': 'Sri Lanka Construction Industry',
 'sources': [{'title': 'International Trade Administration',
              'url': 'https://www.trade.gov/country-commercial-guides/sri-lanka/construction'},
             {'title': 'Research and Markets',
              'url': 'https://www.researchandmarkets.com/reports/5610594/sri-lanka-construction-market-size-trends-and'},
             {'title': 'Crystal Construction Lanka',
              'url': 'https://www.crystalconstruction.lk/economic-forecast-for-sri-lankas-construction-industry-in-2025/'}],
 'summary': {'demand': ['- Strong demand for infrastructure development in Sri '
                        'Lanka, driven by government initiatives to modernize '
                        "the country's transportation network.",
                        '- Increasing construction activity in the residential '
                        'and commercial sectors, fueled by urbanization and '
                        'economic growth.'],
             'prices_