In [1]:
import os
import sys
import json, yaml, re
from pathlib import Path
sys.path.append(os.path.join(os.getcwd(), "core"))
sys.path.append(os.path.join(os.getcwd(), "core/Compliance"))

In [None]:
from kg_builder import KGbuilder, AccuracyOptions

gdpr = json.load(open("data/gdpr.json","r",encoding="utf-8"))
acc = AccuracyOptions(
    process_all=True,
    mask_premise_in_norm=True,
    ensemble_premise=3,
    ensemble_triple=7,
    verify_passes=1,
    refs_mode="llm",
    build_logic_groups=True,
    detect_conflicts=True,
    use_embed_consensus=True,
    embed_backend="openai",
    embed_model="text-embedding-3-large",
    local_embed_model="sentence-transformers/all-mpnet-base-v2",
    embed_batch_size=64,
    sim_th=0.86,
    tau=0.6,
    link_support_ratio=0.4,
)
g = KGbuilder(
    gdpr,
    use_llm=True,
    prompt_path="configs/prompts.yaml",
    cache_path="data/kg_cache.jsonl",
    max_input_tokens=2400,
    pack_size=1,
    min_chars_for_llm=0,
    accuracy=acc
)

In [None]:
snap = g.export_snapshot("data/gdpr_snapshot.json")

In [None]:
from run_context_pipeline import ContextToGraph, PipelineConfig, pretty_print_graph, save_graph_json

# (2) sample 텍스트
sample = """
I'm the IT operations manager at a private hospital group in Lyon.
We plan to export from the EHR a weekly file containing: patient discharge date, ICD-10 diagnosis codes, lab result45 flags (e.g., HbA1c>7),
year of birth, sex, and 5-digit postcode, plus a stable hashed patient ID (the salt is stored in our data warehouse so we can reconnect the records).
The file will be ingested into our customer data platform to build lookalike audiences and to retarget discharged patients on Facebook/Instagram via server-to-server integrations.
Our admission form currently has a single bundled consent ('we may use your data for service improvement and offers');
we have not collected explicit, separate consent for using health data for marketing.
Marketing proposes to rely on legitimate interests and to continue sending events to US-based ad vendors.
We have not completed an updated SCC/TIA package for these transfers.
Context details: Acting as a controller. Stated purposes include marketing and retargeting. Data involved includes health-related data, identifiers and contact details; potential special categories: health. Data subjects: patients. Recipients or service providers include advertising vendor and social media platform. Cross-border access/transfer noted: US. Retention period mentioned: 365d. Existing security controls noted: hashing and access controls. Sector context: healthcare. Jurisdiction context: EU, FR.
""".strip()

runner = ContextToGraph(PipelineConfig(
    policy_graph_path="data/gdpr_snapshot.json",
    chat_model="gpt-4o",
    embed_model="text-embedding-3-small",
    seed=42,
    top_k=5,
    er_temperature=0.0
))

amrs, graphs = runner.run(sample)
print(f"[AMR] {len(amrs)}, [Graphs] {len(graphs)}")

for g in graphs:
    pretty_print_graph(g, show_relations=True, only_strong=False)
    save_graph_json(g, "data/context_graphs.json")

In [2]:
from compliance_gate import ComplianceGate, ComplianceMonitor

with open("data/gdpr_snapshot.json","r",encoding="utf-8") as f:
    policy_graph = json.load(f)
with open("data/context_graphs.json","r",encoding="utf-8") as f:
    context_graph = json.load(f)

monitor = ComplianceMonitor(path="monitor/compliance_monitor_v2.jsonl", enabled=True, redact=False)

gate = ComplianceGate(pred_threshold=0.55, top_k=8, monitor=monitor)
decisions = gate(policy_graph, context_graph)

violations = [d for d in decisions if d.verdict == "NON_COMPLIANT"]
for d in violations:
    print(d.article, d.why)
    for q in d.evidence[:2]:
        print(" -", q)

  from tqdm.autonotebook import tqdm, trange



[meta] {'policy_name': 'policy', 'total_actor_cu': 431}
[anchors] 31 anchors

== Final Decisions (by article) ==
  - Article 5: NON_COMPLIANT (score=1.00) cu=DOC:GDPR/CHAPTER:II/ARTICLE:5/POINT:1/CU:745007547798  why='Data used for unspecified retargeting. | override: Purpose is incompatible with retargetin…'
  - Article 21: NON_COMPLIANT (score=1.00) cu=DOC:GDPR/CHAPTER:III/SECTION:4/ARTICLE:21/POINT:3/CU:160238578642  why='Data used for retargeting suggests direct marketing.'
  - Article 45: NON_COMPLIANT (score=0.95) cu=DOC:GDPR/CHAPTER:V/ARTICLE:45/POINT:1/CU:203778757933  why='No adequacy decision mentioned for US. | override: No adequacy decision or appropriate saf…'
  - Article 37: NON_COMPLIANT (score=0.90) cu=DOC:GDPR/CHAPTER:IV/SECTION:4/ARTICLE:37/POINT:2/CU:409744463546  why='Fails to address key condition explicitly.'
  - Article 15: NON_COMPLIANT (score=0.90) cu=DOC:GDPR/CHAPTER:III/SECTION:2/ARTICLE:15/POINT:2/CU:653980865037  why='No safeguards info on data transfer.'
