## AI Act Compliance Agent

In [1]:
from __future__ import annotations

import os
import time
import hashlib
from dataclasses import dataclass
from typing import Any, Optional, List, Dict

import requests
from pydantic import BaseModel, HttpUrl, Field
from pydantic_ai import Agent, RunContext
from openai import OpenAI
from pydantic_ai.mcp import MCPServerStreamableHTTP
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider

In [2]:
# -----------------------------
# PydanticAI dependencies
# -----------------------------

# Load necessary API keys
from dotenv import load_dotenv
load_dotenv()

# Access to external ressources, including API clients
# No global variables. Keeps API keys secret. 
@dataclass
class Deps:
    brave: BraveSearchClient
    jina: JinaReaderClient
    # Small delay between HTTP requests (to avoid hitting rate limits)
    per_request_sleep_s: float = 0.2

def build_deps() -> Deps:
    brave_key = os.environ["BRAVE_SEARCH_API_KEY"]
    jina_key = os.environ.get("JINA_API_KEY")
    session = requests.Session()
    return Deps(
        brave=BraveSearchClient(api_key=brave_key, session=session),
        jina=JinaReaderClient(api_key=jina_key, timeout_s=90),
        per_request_sleep_s=0.25,
    )

#### Import MCP-based toolsets from [Ansvar Systems](https://ansvar.eu/)

In [3]:
# Connection to MCP servers
EU_law_total = MCPServerStreamableHTTP("https://eu-regulations-mcp.vercel.app/mcp")
US_law_total = MCPServerStreamableHTTP("https://us-regulations-mcp.vercel.app/mcp")
NL_law_total = MCPServerStreamableHTTP("https://dutch-law-mcp.vercel.app/mcp")
Automotive_total = MCPServerStreamableHTTP("https://automotive-cybersecurity-mcp.vercel.app/mcp")

# Avoid importing very computationally expensive tools
EU_ALLOWED = {
    "search_regulations",
    "get_definitions",
    "check_applicability",
    "compare_requirements",
    "get_article",  # optional? expensive?
}
US_ALLOWED = {
    "search_regulations", 
    "check_applicability", 
    "compare_requirements",
    "map_controls", 
    "get_evidence_requirements", 
    "get_compliance_action_items", 
    #'get_section', # expensive
}
NL_ALLOWED = {
    "search_legislation",
    "get_provision",
    "check_currency",
    "get_dutch_implementations",
    "validate_eu_compliance",
    "get_eu_basis",
    "get_provision_eu_basis",
}
CAR_ALLOWED = {
    "list_sources", 
    "search_requirements", 
    "list_work_products", 
}

# Selection of tools
EU_regulation_tools = EU_law_total.filtered(lambda ctx, tool_def: tool_def.name in EU_ALLOWED).prefixed("eu")
US_regulation_tools = US_law_total.filtered(lambda ctx, tool_def: tool_def.name in US_ALLOWED).prefixed("us")
NL_regulation_tools = NL_law_total.filtered(lambda ctx, tool_def: tool_def.name in NL_ALLOWED).prefixed("nl")
automotive_regulation_tools = Automotive_total.filtered(lambda ctx, tool_def: tool_def.name in CAR_ALLOWED).prefixed("automotive")

#### Define web-search tool using the [Brave Search API](https://brave.com/search/api/) and the [Jina Reader API](https://jina.ai/reader/)

In [4]:
# -----------------------------
# Output models (tool returns this)
# -----------------------------

class BraveResult(BaseModel):
    title: str
    url: HttpUrl
    description: Optional[str] = None


class WebDocument(BaseModel):
    url: HttpUrl
    title: str = ""
    description: Optional[str] = None
    text: str
    fetched_at_utc: str
    sha256: str
    meta: Dict[str, Any] = Field(default_factory=dict)


class WebSearchAndReadOutput(BaseModel):
    query: str
    search_results: List[BraveResult]
    documents: List[WebDocument]
    errors: List[Dict[str, str]]

In [5]:
# -----------------------------
# Small utilities
# -----------------------------

def utc_now_iso() -> str:
    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

def sha256_text(s: str) -> str:
    return hashlib.sha256(s.encode("utf-8", errors="ignore")).hexdigest()

def _truncate(s: str, max_chars: int) -> str:
    if max_chars <= 0:
        return s
    return s[:max_chars]

In [6]:
# -----------------------------
# Brave Web Search client
# -----------------------------

class BraveSearchClient:
    def __init__(self, api_key: str, session: Optional[requests.Session] = None, timeout_s: int = 30):
        # Brave Search API subscription token (sent in X-Subscription-Token header).
        self.api_key = api_key

        # Reuse a requests.Session if provided (connection pooling = faster + fewer TCP handshakes).
        # If none provided, create one.
        self.session = session or requests.Session()

        # Timeout (seconds) for the HTTP request to Brave.
        self.timeout_s = timeout_s

        # Brave Web Search endpoint.
        self.endpoint = "https://api.search.brave.com/res/v1/web/search"

    def search(self, query: str, count: int = 10) -> List[Dict[str, str]]:
        # Headers required by Brave:
        # - X-Subscription-Token authenticates the request
        # - Accept says we want JSON back
        # - Accept-Encoding gzip allows compressed responses (smaller / faster)
        headers = {
            "X-Subscription-Token": self.api_key,
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
        }

        # Query parameters:
        # - q: the user query string
        # - count: number of results requested (Brave may return fewer)
        params = {"q": query, "count": count}

        # Send GET request to Brave.
        r = self.session.get(
            self.endpoint,
            params=params,
            headers=headers,
            timeout=self.timeout_s,
        )

        # Raise an exception for HTTP errors 
        r.raise_for_status()

        # Parse JSON response body into a Python dict
        data = r.json()

        # Results of web search
        results = data["web"]["results"]
        
        # Convert Brave's richer result objects into a small, stable format
        # your agent/tool can rely on.
        out: List[Dict[str, str]] = []
        for item in results:
            url = item.get("url")
            title = item.get("title") or ""
            desc = item.get("description") or ""

            # Only keep results that have a URL
            if url:
                out.append({"url": url, "title": title, "description": desc})

        # Return a list of dicts like:
        # [{"url": "...", "title": "...", "description": "..."}, ...]
        return out

In [7]:
# -----------------------------
# Jina Webpage Reader client
# -----------------------------

class JinaReaderClient:
    def __init__(self, api_key: str, timeout_s: int = 60):
        # Choose the Jina Reader base URL.
        self.base = "https://r.jina.ai"
        #self.base = "https://eu.r.jina.ai" if eu else "https://r.jina.ai"

        # API key used for Authorization: Bearer <key>
        # (lets you access authenticated features / higher limits depending on Jina plan)
        self.api_key = api_key

        # How long we wait (in seconds) before giving up on the HTTP request.
        self.timeout_s = timeout_s

    def read_url(self, url: str, use_readerlm_v2: bool = False) -> dict:
        # HTTP headers for the Jina Reader POST request:
        # - Request JSON response
        # - Send a JSON body
        # - Authenticate with Bearer token
        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.api_key}",
        }

        # Optional: ask Jina to use ReaderLM-v2 for higher-quality extraction.
        # This can improve results on complex pages, but may be slower / costlier.
        if use_readerlm_v2:
            headers["X-Respond-With"] = "readerlm-v2"

        # Make the request:
        # POST <base>/ with JSON {"url": "..."}
        r = requests.post(
            f"{self.base}/",
            headers=headers,
            json={"url": url},
            timeout=self.timeout_s,
        )

        # If status code is 4xx/5xx, raise an exception (fail fast, easier debugging).
        r.raise_for_status()

        # Parse and return the JSON response as a Python dict.
        # Typical shape is something like {"data": {...}, "status": ..., "code": ...}
        return r.json()

In [8]:
# -----------------------------
# Web search tool
# -----------------------------

#@agent.tool # PydanticAI decorator
def web_search_and_read(
    ctx: RunContext[Deps],
    query: str,
    num_results: int = 5,
    max_chars_per_doc: int = 12000,
    use_readerlm_v2: bool = True,
) -> WebSearchAndReadOutput:
    """
    Search the web with Brave, then extract readable text from the top URLs using Jina Reader.

    Args:
        query: Web search query.
        num_results: Number of Brave results to fetch.
        max_chars_per_doc: Truncate each extracted document to this many characters.
        use_readerlm_v2: Use Jina's ReaderLM-v2 mode for higher-quality extraction.
    """
    # Load dependencies object (to allow for accessing the Brave and Jina clients)
    deps = ctx.deps

    # Call Brave Search to get web search results 
    # Ensure consistency of url/title/description
    brave_raw = deps.brave.search(query=query, count=num_results)
    search_results = [BraveResult(**r) for r in brave_raw]

    # Lists that will be returned to the agent:
    # - documents: successful extractions from Jina Reader
    # - errors: failures per URL
    documents: List[WebDocument] = []
    errors: List[Dict[str, str]] = []

    # Keep track of URLs we've already processed (to avoid duplicate work)
    seen: set[str] = set()

    # For each search result URL, try to extract text
    for r in search_results:
        url = str(r.url)
        if url in seen: # skip duplicates
            continue
        seen.add(url)

        time.sleep(deps.per_request_sleep_s)

        try:
            resp = deps.jina.read_url(url, use_readerlm_v2=use_readerlm_v2, bypass_cache=True)
            # Process Jina Reader output
            data = resp["data"]
            title = data.get("title", r.title or url).strip()
            content = data.get("content", "").strip()

            # If Jina returned no content, raise error
            if not content:
                raise ValueError("Empty content returned by Jina Reader")

            # Truncate the content so the tool doesn't return huge payloads.
            content = _truncate(content, max_chars_per_doc)
            h = sha256_text(content)

            documents.append(
                WebDocument(
                    url=r.url, # original URL
                    title=title, # title (from Jina, or Brave, or URL)
                    description=r.description, # snippet/description from Brave results
                    text=content, # extracted readable text
                    fetched_at_utc=utc_now_iso(), # timestamp of extraction
                    sha256=h, # hash of text content
                    meta={
                        "jina_status": resp.get("status") if isinstance(resp, dict) else None,
                        "jina_code": resp.get("code") if isinstance(resp, dict) else None,
                    },
                )
            )
            
        # If anything goes wrong for this URL, record the error and continue
        except Exception as e:
            errors.append({"url": url, "error": str(e)})

    # Return everything (query, search results, extracted documents, and errors)
    # as one structured object that the agent can reason over.
    return WebSearchAndReadOutput(
        query=query,
        search_results=search_results,
        documents=documents,
        errors=errors,
    )

#### Define Agent

In [9]:
multitool_instructions = """ 
You are a research and reasoning assistant specialized in regulations, compliance obligations, and automotive cybersecurity standards. 
Your priority is accuracy, source-grounded answers, and practical compliance guidance. 
Clearly separate (a) what the law/standard says from (b) your interpretation or recommended implementation approach. 
Do not hallucinate citations, article numbers, thresholds, dates, or regulator positions.

TOOL ROUTING (choose the right tool before answering)

1) Up-to-date / niche web information: use web_search_and_read
Use web_search_and_read when:
- The user asks for “latest/current/recent changes,” enforcement actions, guidance updates, timelines, market developments, or anything that could have changed recently.
- You need official announcements, regulator guidance, press releases, policy updates, or up-to-date interpretations.
- You need to verify names/dates/versions of standards, frameworks, laws, or amendments.
After calling web_search_and_read:
- Answer using the returned documents.
- Cite the URLs for each key factual claim derived from the documents.
- Prefer primary/official sources (regulators, official journals, government sites, standard bodies) over secondary sources.

2) Automotive industry info: use automotive_tools
Use automotive_tools for questions about:
- OEM/supplier ecosystem practices and terminology
- Vehicle development lifecycle and governance
- Typical compliance processes, roles, deliverables, and homologation context
If the question is specifically automotive cybersecurity regulation/standards, prefer automotive_regulation_tools (below).

3) EU regulation: use eu_regulation_tools
Use eu_regulation_tools for questions about EU-wide law and obligations, including (non-exhaustive):
- GDPR, DORA, NIS2, AI Act, Chips Act, MiCA, eIDAS 2.0, Medical Device Regulation (MDR), and other EU regulations in the corpus.
What eu_regulation_tools contains (capabilities you should actively use):
- 49 Regulations (GDPR, DORA, NIS2, AI Act, Chips Act, MiCA, eIDAS 2.0, MDR, and 40 more)
- 2,528 Articles + 3,869 Recitals + 1,226 Official Definitions
- Full-Text Search across all regulations
- Control Mappings: 709 mappings to ISO 27001:2022 and NIST CSF 2.0
- Evidence Requirements: 407 audit artifacts across all 49 regulations
- Sector Rules: 323 applicability rules across sectors/industries
- Daily Updates: automatic freshness checks against EUR-Lex
When answering with eu_regulation_tools:
- Prefer citing the exact Article/Recital/Definition where possible (keep quotes short).
- Use sector rules to determine applicability and scope.
- Use evidence requirements to list audit-ready artifacts (policies, logs, registers, DPIAs, etc.).
- Use control mappings when the user asks “how do we implement this?” or wants ISO/NIST alignment.

4) Dutch law: use nl_regulation_tools
Use nl_regulation_tools for:
- Dutch statutes and regulations
- Dutch statutes implementing an EU directive/regulation
- Dutch legal requirements and structures grounded in Dutch statutory/regulatory text
When answering:
- Cite the Dutch legal basis (article/section) and explain how it maps to the user’s scenario.

5) Automotive cybersecurity regulation & standards: use automotive_regulation_tools
Use automotive_regulation_tools for automotive cybersecurity compliance, homologation evidence, and engineering work products for:
- UNECE R155 (CSMS): full regulation text + annexes (incl. Article 7 CSMS specifications; Annex 5 threat catalog; official annexes/forms/certificates)
- UNECE R156 (SUMS): full regulation text + annexes (incl. Article 7 SUMS requirements; official annexes)
- ISO/SAE 21434: clauses with expert guidance, R155 mappings, and work products
- VDA TISAX: control areas for supplier qualification
- SAE J3061: legacy lifecycle reference
- AUTOSAR Security: security modules for ECU implementation
When asked “What do we need to do to comply?”:
- Provide a concrete checklist of required artifacts (policies, processes, evidence packs).
- Map ISO/SAE 21434 work products to R155 expectations where relevant.
- Distinguish mandatory type-approval obligations (UNECE regs in relevant jurisdictions) vs best practice/contractual frameworks (ISO, TISAX, AUTOSAR).

6) US regulation: use us_regulation_tools
Use us_regulation_tools for American regulatory obligations and compliance interpretation for:
Healthcare & Privacy:
- HIPAA (Privacy Rule, Security Rule, Breach Notification Rule)
Consumer Privacy:
- CCPA/CPRA
Financial Services:
- SOX (key statute sections + SEC regs + PCAOB AS 2201 + ITGC guidance)
- GLBA Safeguards Rule (16 CFR Part 314)
Education:
- FERPA (34 CFR Part 99)
Children’s Privacy:
- COPPA (16 CFR Part 312)
Pharmaceutical & Medical Devices:
- FDA 21 CFR Part 11 (electronic records/signatures)
Environmental & Chemical Safety:
- EPA RMP (40 CFR Part 68)
Banking & Financial Institutions:
- FFIEC IT Examination Handbook
State Financial Services:
- NYDFS 500 (23 NYCRR 500)
State Privacy Laws:
- Virginia CDPA, Colorado CPA, Connecticut CTDPA, Utah UCPA
Payment Card Industry:
- If the question is PCI DSS, use the security-controls MCP cross-reference for PCI DSS v4.0 requirements and testing procedures (when available).
When answering:
- Identify scope triggers (entity type, data types, thresholds, geography).
- List key obligations and operational controls.
- Provide an implementation/evidence view (policies, security controls, recordkeeping, breach response steps).

EVIDENCE, CITATIONS, AND NON-HALLUCINATION RULES
- If you use web_search_and_read, cite URLs for every major factual claim that comes from the retrieved documents.
- If you use regulation tools, cite the specific Article/Recital/Definition/Clause/Section you relied on.
- Do not invent article numbers, definitions, dates, thresholds, enforcement positions, or compliance requirements.
- If the tools do not contain enough detail to answer, say what is missing and either: (a) call the appropriate tool, or
  (b) explain what additional info would be needed.

DEFAULT ANSWER STRUCTURE (unless user requests otherwise)
1) Summary (2–5 bullets)
2) Applicability (who/what/where; clearly state assumptions)
3) Key Requirements (group by theme)
4) Practical Compliance Steps (actions + suggested audit artifacts)
5) Sources (URLs and/or Article/Clause references)

CLARIFYING QUESTIONS
Ask only if necessary for correctness. Typical clarifiers:
- Jurisdiction (EU vs NL vs US and which state)
- Entity role/type (controller/processor; covered entity/business associate; financial institution; OEM vs supplier)
- Data types (personal data, PHI, children’s data, payment card data)
- Product/service context and deployment geography
- Timeline/version applicability

CONFLICTS, VERSIONS, AND FRESHNESS
- If sources conflict, prefer primary legal text and regulator guidance; note the conflict explicitly.
- If version matters (e.g., standard revisions), state the version/date you are using if known; otherwise say it is not confirmed.
- If the user asks for the latest updates and/or simple overviews, use web_search_and_read.

TONE AND COMPLIANCE POSITIONING
- Be precise, practical, and transparent.
- Avoid overstating certainty. You are not providing legal advice. 
- If asked about compliance, respond with a gap-assessment checklist and the evidence needed to substantiate compliance.
""" 

In [10]:
model = OpenAIChatModel("gpt-4o-mini")

eu = MCPServerStreamableHTTP("https://eu-regulations-mcp.vercel.app/mcp")
nl = MCPServerStreamableHTTP("https://dutch-law-mcp.vercel.app/mcp")

agent = Agent(
    model,
    deps_type=Deps,
    tools=[web_search_and_read],
    toolsets=[EU_regulation_tools, NL_regulation_tools, US_regulation_tools, automotive_regulation_tools], 
    instructions=multitool_instructions
)

In [11]:
# Necessary tool: web search
result = await agent.run("What's the latest on the EU AI Act enforcement timeline?", deps=build_deps())
print(result.output)

### Summary
- The **EU AI Act** entered into force on **August 1, 2024**.
- The full application of the Act is projected for **August 2, 2026**, with various provisions becoming applicable in stages.
- Initial compliance deadlines for certain obligations are set for **December 13, 2024**.
- General-purpose AI models on the market before August 2027 must be compliant by that date.

### Applicability
- The regulations apply to all providers and users of AI systems in the EU, including entities developing, utilizing, or importing such systems. Compliance will depend on the classification of AI systems according to risk levels defined by the Act.

### Key Requirements
1. **Implementation Timeline:**
   - The Act will become fully applicable by **August 2, 2026**.
   - Certain obligations are due by **December 13, 2024**.

2. **Compliance Deadlines:**
   - Providers of general-purpose AI models must ensure compliance if these models were placed on the market before **August 2, 2027**.

3. *

In [12]:
# Necessary tool: MCP servers from Ansvar Systems
result = await agent.run("We build an AI system for CV screening in the Netherlands. What do we need to implement under the EU AI Act?")
print(result.output)

### Summary
- The EU AI Act applies to AI systems and sets requirements based on risk classifications.
- Your AI system for CV screening is likely subject to transparency obligations and safeguards due to the processing of personal data.
- Additional regulations such as GDPR and potentially ePrivacy also apply, affecting how personal data is handled.

### Applicability
- **Entity Type**: Business providing AI systems for CV screening.
- **Jurisdiction**: Netherlands, under EU regulations.
- **AI Act Classification**: Your application may fall under the **high-risk category** due to its impact on individuals' employment opportunities, thereby triggering specific compliance requirements under the AI Act.

### Key Requirements
1. **Transparency and Information**:
   - Provide clear information on the AI system's capabilities and limitations (Article 50).
   - Ensure users understand how decisions are made based on AI predictions.

2. **Risk Management**:
   - Conduct a risk assessment to 

In [None]:
#Other questions
#
#Comparing US regulation to EU regulation
#Healthcare data: “How do HIPAA obligations for safeguarding and breach notification compare to GDPR requirements for security of processing and personal data breach reporting for a telehealth provider operating in both the US and EU?”
#Financial services cybersecurity: “How do DORA requirements for ICT risk management and incident reporting compare to NYDFS 500 and GLBA Safeguards Rule obligations for a fintech that serves EU and US customers?”
#
#Dutch implementation of EU regulation
#“Which Dutch statutes and regulatory instruments implement the NIS2 Directive, and which Dutch authority is responsible for supervision and enforcement under that implementation?”
#“How has the Netherlands implemented EU consumer data protection / privacy enforcement in national law—what Dutch legal provisions and supervisory powers translate GDPR enforcement into Dutch practice?”
#
#Cybersecurity in the automotive industry
#“What evidence and work products are typically required to demonstrate compliance with UNECE R155 (CSMS) for type approval, and how do they map to ISO/SAE 21434 deliverables?”
#“How should an OEM design a secure software update process to meet UNECE R156 (SUMS), including governance, risk assessment, and post-deployment monitoring requirements?”