# 04 Query Graph (Ref-only schema)

현재 그래프 스키마(Document/Article/Paragraph + REF)에 맞춘 조회 노트북

In [None]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase

load_dotenv()
URI = os.getenv('NEO4J_URI', '').strip()
USER = os.getenv('NEO4J_USER', 'neo4j').strip()
PWD = os.getenv('NEO4J_PASSWORD', '').strip()
DB = os.getenv('NEO4J_DATABASE', 'neo4j').strip()

if not URI or not PWD:
    raise ValueError('NEO4J_URI / NEO4J_PASSWORD 확인 필요')

driver = GraphDatabase.driver(URI, auth=(USER, PWD))
driver.verify_connectivity()


def run_query(cypher: str, **params):
    records, summary, keys = driver.execute_query(cypher, database_=DB, **params)
    return records, summary, keys


In [None]:
QUERY = '서울특별시 종로구 송현동 48-24번지, 49-4번지, 대지면적 9787m2, 연면적 25676m2, 층수 지하 2층 지상3층, 건폐율 60%이하, 용적률 150%이하, 면적표 25696m2, 높이 16m 이하, 도시지역, 제1종일반주고, 고도지구(16m) 건축선을 알려줘'
TARGET = '건축선'
TOP_K = 5
HOPS = 1


In [None]:
# step1: seed paragraph 검색
records, summary, _ = run_query(
    'MATCH (p:Paragraph) '
    'WHERE p.content CONTAINS $target '
    'RETURN p.paragraph_key AS paragraph_key, p.law_name AS law_name, p.article_num AS article_num, '
    'p.paragraph_num AS paragraph_num, substring(p.content,0,500) AS content '
    'LIMIT $k',
    target=TARGET,
    k=TOP_K,
)

seeds = [r.data() for r in records]
print('seed count:', len(seeds), 'time(ms)=', summary.result_available_after)
for s in seeds:
    print('-', s['paragraph_key'], s['law_name'], s['article_num'], s['paragraph_num'])


In [None]:
# step2: REF hop 확장 (Paragraph seed -> parent Article -> REF)
seed_keys = [s['paragraph_key'] for s in seeds]

# seed paragraph의 parent article
records, _, _ = run_query(
    'UNWIND $keys AS k '
    'MATCH (a:Article)-[:HAS_PARAGRAPH]->(p:Paragraph {paragraph_key:k}) '
    'RETURN DISTINCT a.article_key AS article_key',
    keys=seed_keys,
)
frontier = set([r['article_key'] for r in records])
seen_articles = set(frontier)
ref_edges = []
ref_targets = set()

for _ in range(HOPS):
    if not frontier:
        break
    records, _, _ = run_query(
        'UNWIND $keys AS k '
        'MATCH (a:Article {article_key:k})-[r:REF]->(b) '
        'RETURN a.article_key AS from_article, labels(b) AS to_labels, '
        'coalesce(b.article_key, b.paragraph_key, b.law_key) AS to_key, '
        'r.scope AS scope, r.raw AS raw, r.target_level AS target_level',
        keys=list(frontier),
    )
    next_frontier = set()
    for r in records:
        x = r.data()
        ref_edges.append(x)
        if 'Article' in x['to_labels'] and x['to_key']:
            if x['to_key'] not in seen_articles:
                seen_articles.add(x['to_key'])
                next_frontier.add(x['to_key'])
        if x['to_key']:
            ref_targets.add((tuple(x['to_labels']), x['to_key']))
    frontier = next_frontier

print('ref_edges:', len(ref_edges))
print('seen_articles:', len(seen_articles))
print('ref_targets:', len(ref_targets))


In [None]:
# step3: context 수집
article_keys = list(seen_articles)

records, summary, _ = run_query(
    'UNWIND $keys AS k '
    'MATCH (a:Article {article_key:k})-[:HAS_PARAGRAPH]->(p:Paragraph) '
    'RETURN p.paragraph_key AS paragraph_key, p.law_name AS law_name, p.article_num AS article_num, '
    'p.paragraph_num AS paragraph_num, p.content AS content',
    keys=article_keys,
)
contexts = [r.data() for r in records]
print('contexts:', len(contexts), 'time(ms)=', summary.result_available_after)


In [None]:
# step4: 확인 출력 (LLM 답변 생성 전)
print('[QUERY]', QUERY)
print('\n[SEEDS]')
for s in seeds[:5]:
    print(f"- {s['law_name']} 제{s['article_num']}조 {s['paragraph_num']}항 :: {s['paragraph_key']}")

print('\n[REF EDGES]')
for e in ref_edges[:20]:
    print(f"- {e['from_key']} --REF({e.get('scope','')})--> {e['to_key']} | raw={e.get('raw','')}")

print('\n[CONTEXT PREVIEW]')
for c in contexts[:5]:
    print(f"[{c['law_name']} 제{c['article_num']}조 {c['paragraph_num']}항]")
    print(c['content'][:220])
    print('---')


In [None]:
driver.close()
print('driver closed')
