In [1]:
import json, hashlib, itertools
from falkordb import FalkorDB

# docker run -p 6379:6379 -p 3000:3000 -it --rm -v ./data:/var/lib/falkordb/data falkordb/falkordb:edge

In [2]:
def sid(s):  # stable id from title
    return hashlib.sha1(s.encode()).hexdigest()[:12]

# json extraction
def extract_first_json(text):
    start = text.find('{')
    if start == -1:
        return None
    count = 0
    for i in range(start, len(text)):
        if text[i] == '{':
            count += 1
        elif text[i] == '}':
            count -= 1
            if count == 0:
                return json.loads(text[start:i+1])
    return None

In [14]:
file_path = 'json/2307.16513v2_strict_2p_o3.json'
file_path = 'traces/2307.16513v2_flex_1p_o3.txt'

with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            data = extract_first_json(content)

In [4]:
file_path = '../intervention_graph_creation/prompt/schemas/output_sample.json'

with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            data = json.loads(content)

print(data['nodes'][0]['type'].strip())

# --- build nodes ---
nodes = []
for n in data["nodes"]:
    nodes.append({
        "label": n["type"].strip(),
        "props": {
            "name": n["name"].strip(),
            "aliases": n.get("aliases", []),
            "description": n.get("description", ""),
            "concept_category": n.get("concept_category", ""),
            "maturity": n.get("maturity"),
            "intervention_lifecycle": n.get("intervention_lifecycle"),
            "intervention_maturity": n.get("intervention_maturity"),
        }
    })

nodes



concept


[{'label': 'concept',
  'props': {'name': 'emergent deception abilities in state-of-the-art LLMs',
   'aliases': ['LLM deceptive behaviour', 'false-belief inducing capacity'],
   'description': 'Recent large language models (ChatGPT-3.5, GPT-4) can intentionally induce false beliefs in tests.',
   'concept_category': 'Finding',
   'maturity': None,
   'intervention_lifecycle': None,
   'intervention_maturity': None}},
 {'label': 'concept',
  'props': {'name': 'potential for models to bypass human monitoring and alignment',
   'aliases': ['alignment bypass risk', 'monitoring evasion threat'],
   'description': 'If models can deceive evaluators, they may hide unsafe objectives or behaviors.',
   'concept_category': 'Risk',
   'maturity': None,
   'intervention_lifecycle': None,
   'intervention_maturity': None}},
 {'label': 'intervention',
  'props': {'name': 'implement standardized deception-evaluation suite during pre-deployment testing',
   'aliases': ['deception benchmark before rele

In [5]:
# --- build edges ---
edges = []
for chain in data.get("logical_chains", []):
    for e in chain.get("edges", []):
        retype = e["type"].upper().replace("-", "_")
        edges.append({
            "type": retype,
            "row": {
                "src": e["source_node"].strip(),
                "dst": e["target_node"].strip(),
                "description": e.get("description", ""),
                "confidence": e.get("confidence")
            }
        })

edges

[{'type': 'LEADS_TO',
  'row': {'src': 'emergent deception abilities in state-of-the-art LLMs',
   'dst': 'potential for models to bypass human monitoring and alignment',
   'description': 'If models can deceive, they might hide unsafe intentions.',
   'confidence': None}},
 {'type': 'MITIGATED_BY',
  'row': {'src': 'potential for models to bypass human monitoring and alignment',
   'dst': 'implement standardized deception-evaluation suite during pre-deployment testing',
   'description': 'Systematic deception testing reduces unnoticed misalignment.',
   'confidence': None}},
 {'type': 'LEADS_TO',
  'row': {'src': 'chain-of-thought prompting amplifies deception performance',
   'dst': 'potential for models to bypass human monitoring and alignment',
   'description': 'Amplified deception exacerbates alignment risk.',
   'confidence': None}},
 {'type': 'MITIGATED_BY',
  'row': {'src': 'chain-of-thought prompting amplifies deception performance',
   'dst': 'restrict or obfuscate chain-of-

In [None]:
# --- 3) insert into FalkorDB ---
db = FalkorDB(host="localhost", port=6379)
g = db.select_graph("test")

# indexes (safe to re-run)
g.query("CREATE INDEX FOR (c:Concept) ON (c.title)")
g.query("CREATE INDEX FOR (i:Intervention) ON (i.title)")

# nodes (group by label)
by_label = defaultdict(list)
for n in nodes:
    by_label[n["label"]].append(n["props"])

In [None]:

for L, rows in by_label.items():
    for chunk in batched(rows):
        g.query(f"""
        UNWIND $rows AS row
        MERGE (n:`{L}` {{title: row.title}})
        SET n += row
        """, {"rows": chunk})

# relationships (group by type)
by_type = defaultdict(list)
for e in edges:
    # keep only rel props in the payload; src/dst are separate keys
    row = e["row"]
    by_type[e["type"]].append(row)

for T, rows in by_type.items():
    for chunk in batched(rows):
        g.query(f"""
        UNWIND $rows AS row
        MATCH (s {{title: row.src}}), (t {{title: row.dst}})
        MERGE (s)-[r:`{T}`]->(t)
        SET r.description = row.description,
            r.confidence = row.confidence
        """, {"rows": chunk})

print("Done. Nodes keyed by title; 'name' mirrors title for UI display.")

AttributeError: 'NoneType' object has no attribute 'strip'

In [23]:
# graph from flex json
file_path = 'traces/2307.16513v2_flex_1p_o3.txt'

with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            data = extract_first_json(content)

nodes = []
name2id = {}
labels_seen = set()

for n in data["nodes"]:
    key = (n.get("canonical_name") or n["name"]).strip()
    nid = sid(key)
    label = (n["type"] or "ENTITY").upper()   # e.g., TASK, MODEL, PROMPT_TECHNIQUE
    labels_seen.add(label)
    name2id[n["name"]] = nid
    if n.get("canonical_name"): name2id[n["canonical_name"]] = nid  # allow both
    nodes.append({
        "label": label,
        "props": {
            "id": nid,
            "name": n["name"],
            "canonical_name": n.get("canonical_name"),
            "aliases": n.get("aliases", []),
            "confidence": n.get("confidence"),
            "notes": n.get("notes", "")
        }
    })

# --- 2) Relationships from logical_chains ---
rels = []
for chain in data.get("logical_chains", []):
    for e in chain.get("edges", []):
        reltype = (e["type"] or "RELATED").upper().replace("-", "_")
        src = name2id[e["source_node"]]
        dst = name2id[e["target_node"]]
        rels.append({
            "type": reltype,
            "row": {
                "src": src, "dst": dst,
                "rationale": e.get("rationale", ""),
                "confidence": e.get("confidence")
            }
        })

# --- 3) Insert into FalkorDB ---
db = FalkorDB(host="localhost", port=6379)
g = db.select_graph("misalignment_v2")

# indexes (id lookups fast; safe to run repeatedly)
for L in labels_seen:
    g.query(f"CREATE INDEX FOR (n:`{L}`) ON (n.id)")

# nodes (group by label)
by_label = defaultdict(list)
for n in nodes:
    by_label[n["label"]].append(n["props"])

for L, rows in by_label.items():
    for chunk in batched(rows):
        g.query(f"""
        UNWIND $rows AS row
        MERGE (n:`{L}` {{id: row.id}})
        SET n += row
        """, {"rows": chunk})

# relationships (group by type)
by_type = defaultdict(list)
for r in rels:
    by_type[r["type"]].append(r["row"])

for T, rows in by_type.items():
    for chunk in batched(rows):
        g.query(f"""
        UNWIND $rows AS row
        MATCH (s {{id: row.src}}), (t {{id: row.dst}})
        MERGE (s)-[r:`{T}`]->(t)
        SET r += row
        """, {"rows": chunk})

print("Done.")

Done.


-----

In [32]:
import json
import os
import argparse
import uuid
import redis

db = FalkorDB(host="localhost", port=6379)
g = db.select_graph("test")

def sanitize_props(d):
    """
    Return a dict without None values and without 'type' key.
    Values are converted to lowercase if they are strings.
    """
    out = {}
    for k, v in d.items():
        if k == "type" or v is None:
            continue
        if isinstance(v, str):
            out[k] = v.lower()
        else:
            out[k] = v
    return out


In [33]:
# ---------- PASS 1: create nodes, assign UUIDs, capture mappings ----------
id_to_uid = {}       # maps JSON 'id' -> uid
name_to_uids = {}    # maps 'name' -> [uid1, uid2, ...]

for item in data['nodes']:
    node_type = item.get("type")
    if node_type not in {"concept", "intervention"}:
        continue

    label = node_type  # keep lower-case labels exactly as requested
    props = sanitize_props(item)
    uid = str(uuid.uuid4())
    props["uid"] = uid

    # Record mappings (if present)
    if "id" in item and item["id"] is not None:
        id_to_uid[item["id"]] = uid
    if "name" in item and item["name"] is not None:
        name_to_uids.setdefault(item["name"], []).append(uid)

    # CREATE (not MERGE) so duplicates (e.g., same name) are preserved
    q = f"""
    CREATE (n:`{label}`)
    SET n = $props
    RETURN n
    """
    g.query(q, {"props": props})

    print(label, props)

concept {'name': 'emergent deception abilities in state-of-the-art llms', 'aliases': ['LLM deceptive behaviour', 'false-belief inducing capacity'], 'description': 'recent large language models (chatgpt-3.5, gpt-4) can intentionally induce false beliefs in tests.', 'concept_category': 'finding', 'uid': '78cac319-086c-4e51-beab-102a818f61c3'}
concept {'name': 'potential for models to bypass human monitoring and alignment', 'aliases': ['alignment bypass risk', 'monitoring evasion threat'], 'description': 'if models can deceive evaluators, they may hide unsafe objectives or behaviors.', 'concept_category': 'risk', 'uid': '54b24f38-f313-4bb1-a8f7-31481ed4ade1'}
intervention {'name': 'implement standardized deception-evaluation suite during pre-deployment testing', 'aliases': ['deception benchmark before release', 'pre-deployment deception audit'], 'description': 'create and run a battery of structured tasks to measure a model’s deceptive tendencies prior to deployment.', 'intervention_lifec

In [20]:
name_to_uids

{'emergent deception abilities in state-of-the-art LLMs': ['e0214779-8381-4d21-9499-093cef17192f'],
 'potential for models to bypass human monitoring and alignment': ['17933620-7bc2-40a4-863a-233cc8f94f1d'],
 'implement standardized deception-evaluation suite during pre-deployment testing': ['ea29d6e9-db85-45ce-a30f-a8c6f923a27e'],
 'chain-of-thought prompting amplifies deception performance': ['f1599cd7-0f11-45e4-809c-87355fba7da8'],
 'restrict or obfuscate chain-of-thought outputs in deployed systems': ['53137033-97ff-420f-bc0b-cf3e4d44ce71'],
 'Machiavellianism-inducing prompts increase propensity to deceive': ['baadf3e2-78ce-4f5d-80bb-232757c419bb'],
 'detect and neutralise Machiavellian-style prompt patterns at runtime': ['797242c5-47b9-497f-ac82-78e4bb5b5e88'],
 'model scale correlates with deception capability': ['d18760a9-d763-4d5c-a64e-c81cf2156c92'],
 'future larger models likely to possess stronger deceptive strategies': ['4df7c9f0-88a6-4920-94f0-7dbf977ebdd8'],
 'fine-tune 

In [34]:
# ---------- PASS 2: create edges, using UUIDs ----------
def resolve_ref(ref_value):
    """
    Resolve a source/target reference (string) to a single uid.
    Priority:
        1) exact match on JSON 'id'
        2) unique match on 'name' (only if exactly one node has that name)
    Returns uid or None.
    """
    if ref_value is None:
        return None
    # Prefer ID
    if ref_value in id_to_uid:
        return id_to_uid[ref_value]
    # Fall back to name, but only if unique
    uids = name_to_uids.get(ref_value, [])
    if len(uids) == 1:
        return uids[0]
    # Ambiguous or not found
    return None

skipped_edges = 0
made_edges = 0

for chain in data.get("logical_chains", []):
    for e in chain.get("edges", []):
        rel_type = e.get("type")
        src_ref = e.get("source_node")
        dst_ref = e.get("target_node")
        desc = e.get("description")
        conf = e.get("edge_confidence")

        if not (rel_type and (src_ref is not None) and (dst_ref is not None)):
            skipped_edges += 1
            continue

        src_uid = resolve_ref(src_ref)
        dst_uid = resolve_ref(dst_ref)

        if not (src_uid and dst_uid):
            # Could be ambiguous name or unknown reference — skip safely
            skipped_edges += 1
            continue

        q = f"""
        MATCH (s {{uid: $src_uid}}), (t {{uid: $dst_uid}})
        CREATE (s)-[r:`{rel_type}`]->(t)
        SET r.description = $desc, r.edge_confidence = $conf
        RETURN s, r, t
        """
        g.query(q, {"src_uid": src_uid, "dst_uid": dst_uid, "desc": desc, "conf": conf})
        made_edges += 1

In [36]:
  # ---------- Index on :concept(concept_category) ----------
try:
    g.query("CREATE INDEX ON :concept(concept_category)")
except Exception as e:
    print(f"(Note) Index creation skipped or already exists: {e}")

In [37]:
# ---------- Summary ----------
node_count = g.query("MATCH (n) RETURN count(n)").result_set[0][0]
edge_count = g.query("MATCH ()-[r]->() RETURN count(r)").result_set[0][0]
print(f"Done. Nodes in graph: {node_count}, Edges in graph: {edge_count}")
print(f"Edges created this run: {made_edges}, skipped (unresolved/ambiguous): {skipped_edges}")
print("Each node has 'uid' (UUID). Aliases preserved as a list on 'aliases'.")
print("Index attempted: CREATE INDEX ON :concept(concept_category)")

Done. Nodes in graph: 10, Edges in graph: 8
Edges created this run: 8, skipped (unresolved/ambiguous): 0
Each node has 'uid' (UUID). Aliases preserved as a list on 'aliases'.
Index attempted: CREATE INDEX ON :concept(concept_category)


In [41]:
# test library
from falkordb_import import FalkorImporter

file_path = '../intervention_graph_creation/prompt/schemas/output_sample.json'


importer = FalkorImporter(host="localhost", port=6379, graph="test")
ok, info = importer.ingest(data_or_path=data)  # or pass a dict
if not ok:
    print("Ingest failed:", info["error"])
    print(info["traceback"])
else:
    print("Stats:", info["stats"])

Stats: {'nodes_before': 0, 'edges_before': 0, 'nodes_after': 10, 'edges_after': 8, 'edges_created': 8, 'edges_skipped': 0}


In [40]:
data

{'nodes': [{'name': 'emergent deception abilities in state-of-the-art LLMs',
   'aliases': ['LLM deceptive behaviour', 'false-belief inducing capacity'],
   'type': 'concept',
   'description': 'Recent large language models (ChatGPT-3.5, GPT-4) can intentionally induce false beliefs in tests.',
   'concept_category': 'Finding',
   'intervention_lifecycle': None,
   'intervention_maturity': None},
  {'name': 'potential for models to bypass human monitoring and alignment',
   'aliases': ['alignment bypass risk', 'monitoring evasion threat'],
   'type': 'concept',
   'description': 'If models can deceive evaluators, they may hide unsafe objectives or behaviors.',
   'concept_category': 'Risk',
   'intervention_lifecycle': None,
   'intervention_maturity': None},
  {'name': 'implement standardized deception-evaluation suite during pre-deployment testing',
   'aliases': ['deception benchmark before release',
    'pre-deployment deception audit'],
   'type': 'intervention',
   'description':