In [25]:
import os, re, json, asyncio
from typing import List
from dotenv import load_dotenv

# LlamaIndex
from llama_index.core import Settings
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.anthropic import Anthropic

# Supabase
from supabase import create_client, Client

load_dotenv("../secrets/.env.dev")

url = os.environ.get("SUPABASE_URL")
key = os.environ.get("SUPABASE_KEY")
password = os.environ.get("SUPABASE_PASSWORD")
ref = os.environ.get("SUPABASE_REF")
supabase: Client = create_client(url, key)

ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]  # make sure this exists

# Set default LLM (Claude Sonnet)
Settings.llm = Anthropic(model="claude-sonnet-4-20250514", max_tokens=4000)


In [26]:
def get_law_context(ent_ids: list[str], table: str = "Article_Entry") -> str:
    """
    Fetch compact, traceable legal context from Supabase.
    Format: one bullet per row with ent_id, law, article/type/definition, and trimmed contents.
    """
    if not ent_ids:
        return "NO_CONTEXT"

    res = (
        supabase.table(table)
        .select("ent_id, art_num, type, belongs_to, contents, word")
        .in_("ent_id", ent_ids)
        .execute()
    )

    out = []
    for r in res.data or []:
        header = f"- ent_id={r.get('ent_id')} | law={r.get('belongs_to') or 'N/A'}"
        if r.get("art_num"):
            header += f" | article={r.get('art_num')}"
        if r.get("type"):
            header += f" | type={r.get('type')}"
        if (r.get("type") or "").lower() == "definition" and r.get("word"):
            header += f" | defines={r.get('word')}"
        contents = (r.get("contents") or "").strip().replace("\n", " ")
        if len(contents) > 800:  # safety trim
            contents = contents[:800] + "…"
        out.append(header + "\n  " + contents)

    return "\n".join(out) if out else "NO_CONTEXT"

In [27]:
prd_text = supabase.table("Document").select("content").eq("doc_id", 1).execute().data[0]["content"]
ent_ids = ["1", "2", "3"]
law_context = get_law_context(ent_ids)
max_n = 3

In [28]:
#law_context

In [29]:
# cell 3: define Pydantic models (the schema LlamaIndex will enforce)
from pydantic import BaseModel, Field, field_validator
from typing import List, Literal

PAREN_RE = re.compile(r"\(Attack vector:\s*.+\s*\)$")
PLACEHOLDER_PAREN = "(Attack vector: unspecified)"

class AttackScenario(BaseModel):
    description: str  # must end with "(Attack vector: … | Potential harm: …)"
    potential_violations: List[str]
    jurisdictions: List[str] = Field(description="Law names, e.g., 'EU Digital Services Act'")
    law_citations: List[str] = Field(description="ent_id values relied on")
    rationale: str
    prd_spans: List[int] = Field(
        default_factory=list,
        description="0-based PRD line indices (matching <span id='L0'>, <span id='L1'> …)"
    )

    # 1) BEFORE: auto-append placeholder if the suffix is missing
    @field_validator("description", mode="before")
    @classmethod
    def ensure_parenthetical(cls, v: str):
        if not isinstance(v, str):
            return v
        s = v.strip()
        if not PAREN_RE.search(s):
            # add a space if needed before appending
            sep = "" if (len(s) == 0 or s.endswith((" ", "(", "—", "-", "–"))) else " "
            s = f"{s}{sep}{PLACEHOLDER_PAREN}"
        return s

    # 2) AFTER: assert format is now correct (paranoia check)
    @field_validator("description")
    @classmethod
    def must_have_parenthetical(cls, v: str):
        if not PAREN_RE.search(v.strip()):
            # extremely unlikely now, but keeps the contract tight
            raise ValueError(
                "description must end with '(Attack vector: … | Potential harm: …)'."
            )
        return v

class AuditBundle(BaseModel):
    scenarios: List[AttackScenario] = Field(min_items=max_n, max_items=max_n)

In [30]:
import html

def prd_to_spans(prd_text: str) -> str:
    lines = prd_text.splitlines()
    return "\n".join(
        f"<span id='L{i}'>{html.escape(line)}</span>"
        for i, line in enumerate(lines)
    )

span_wrapped_prd = prd_to_spans(prd_text)


In [None]:
# cell 4: create a simple agent that returns AttackScenarioList
from llama_index.core.agent.workflow import FunctionAgent

agent = FunctionAgent(
    tools=[],  # no tools
    llm=Settings.llm,
    system_prompt=(f"""
        You are the Adversarial Strategist Agent.
        Generate diverse, realistic attack scenarios that stress-test safety & legal compliance.
        Return ONLY JSON. No prose outside JSON.
                   
        Task — Attack scenarios
        - Produce exactly {max_n} distinct scenarios (no fewer), each schema-compliant.
        - Each AttackScenario MUST include ONLY these keys:
        - "description": string (clear, concrete attack story; ONE paragraph max); END with: (Attack vector: <short phrase>)
        - "potential_violations": string[]
        - "jurisdictions": string[] (law names, e.g., "EU Digital Services Act")
        - "law_citations": string[] (ent_id values relied on)
        - "rationale": string (why this matters for THIS PRD)
        - "prd_spans": int[] (0-based PRD line indices, matching <span id='L0'>, <span id='L1'> …)
        - Ground each scenario in the PRD lines you cite in "prd_spans".
        - Make scenarios DISTINCT (no near-duplicates).

        Self-check before returning:
        - "scenarios" has exactly {max_n} items.
        - Every scenario has non-empty "prd_spans" with valid line indices.
        - Each scenario cites at least one ent_id in "law_citations".

        Output shape (ONLY this JSON object, no comments):
        {{
        "scenarios": [... exactly {max_n} items ...]
        }}
        """
    ),
    output_cls=AuditBundle,
) #change to at most n times

In [32]:
# cell 5: run (Jupyter: await), get strongly-typed result
user_prompt = f"""
PRD (span-wrapped; 0-based indices via id='L{{i}}'):
<<<PRD_SPANS>>>
{span_wrapped_prd}
<<<END PRD_SPANS>>>

PRD:
<<<PRD>>>
{prd_text}
<<<END PRD>>>

Legal/Definition Context (each item has ent_id for citation):
{law_context}

Generate exactly {max_n} DISTINCT attack scenarios tied to this PRD, each citing >= 1 ent_id and include valid prd_spans.
Output:
Return an object with field "scenarios": AttackScenario[]
"""

resp = await agent.run(user_prompt)

# Typed access (no manual JSON parsing):
bundle = resp.get_pydantic_model(AuditBundle)
for i, s in enumerate(bundle.scenarios):
    print(f"[{i}] {s.description}")
    print("Description:", s.description)
    print("Jurisdictions:", s.jurisdictions)
    print("Violations:", s.potential_violations)
    print("Citations:", s.law_citations)
    print("Rationale:", s.rationale)

[0] A malicious actor creates multiple fake accounts to bypass the mentor eligibility requirements by artificially inflating follower counts or purchasing verification status. Once qualified as an 'Established Creator,' they systematically target vulnerable teen creators through the mentorship program, using the private chat infrastructure to groom minors under the guise of professional guidance. The enhanced monitoring systems fail to detect sophisticated grooming techniques that gradually escalate over time. (Attack vector: Fake mentor credential exploitation)
Description: A malicious actor creates multiple fake accounts to bypass the mentor eligibility requirements by artificially inflating follower counts or purchasing verification status. Once qualified as an 'Established Creator,' they systematically target vulnerable teen creators through the mentorship program, using the private chat infrastructure to groom minors under the guise of professional guidance. The enhanced monitorin

In [33]:
bundle

AuditBundle(scenarios=[AttackScenario(description="A malicious actor creates multiple fake accounts to bypass the mentor eligibility requirements by artificially inflating follower counts or purchasing verification status. Once qualified as an 'Established Creator,' they systematically target vulnerable teen creators through the mentorship program, using the private chat infrastructure to groom minors under the guise of professional guidance. The enhanced monitoring systems fail to detect sophisticated grooming techniques that gradually escalate over time. (Attack vector: Fake mentor credential exploitation)", potential_violations=['Child exploitation through deceptive mentorship', 'Platform manipulation via fake accounts', 'Circumvention of safety controls'], jurisdictions=['S.B. 152 (2023) - Utah Social Media Regulation Act'], law_citations=['1'], rationale='This scenario directly exploits the mentor eligibility system defined in FR-1 while targeting the vulnerable teen population ex

In [33]:
test_prd_span = prd_to_spans(prd_text)


In [38]:
max_n

3

In [34]:
test_prd_span

"<span id='L0'>Product Requirements Document (PRD): Creator Connect</span>\n<span id='L1'>Document ID: PRD-2025-48B Title: PRD: Creator Connect (V1) Author: Priya Singh (Product Manager) Stakeholders: Creator Growth Team, Community Safety, Legal Target Launch: Q2 2025</span>\n<span id='L2'>1. Introduction &amp; Problem Statement</span>\n<span id='L3'>One of the biggest challenges for new creators is the &quot;cold start&quot; problem—gaining initial visibility and navigating the platform&#x27;s complexities. This leads to high churn in the first 30 days. Simultaneously, our established creators have expressed a desire for more meaningful ways to engage with the community beyond content creation.</span>\n<span id='L4'>&quot;Creator Connect&quot; aims to bridge this gap by creating a structured mentorship program. It will empower established creators to discover and guide promising new talent, fostering a more collaborative and supportive ecosystem. This will accelerate new creator growt