## Graph DB 연결하기
* Crime Investigation Dataset: https://github.com/neo4j-graph-examples/pole

In [1]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j+s://ddadee7d.databases.neo4j.io"
AUTH = ("neo4j", "dBUuO0ettwv7MK-R1UsGAPLUs7hA22-YltunltI4d18")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

In [2]:
# 로컬에 있는 .env 를 통해 API key 불러옴 
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from neo4j import GraphDatabase, basic_auth

driver = GraphDatabase.driver(
  "neo4j://18.212.35.48:7687",
  auth=basic_auth("neo4j", "swamp-surges-rag"))

In [5]:
from neo4j_graphrag.llm.openai_llm import OpenAILLM

llm = OpenAILLM(model_name="gpt-4o")

In [4]:
cypher_query = '''
MATCH (l:Location {address:$address})<-[r:OCCURRED_AT]-(c:Crime)
RETURN c.date as crimeDate
'''

with driver.session(database="neo4j") as session:
  results = session.read_transaction(
    lambda tx: tx.run(cypher_query,
                      address="Piccadilly").data())
  for record in results:
    print(record['crimeDate'])

driver.close()

  results = session.read_transaction(


23/08/2017
18/08/2017
18/08/2017
3/08/2017
27/08/2017
15/08/2017
12/08/2017
11/08/2017
5/08/2017
10/08/2017
6/08/2017
1/08/2017
31/08/2017
21/08/2017
1/08/2017
20/08/2017
30/08/2017
19/08/2017
26/08/2017
7/08/2017
30/08/2017
10/08/2017
23/08/2017
30/08/2017
1/08/2017
1/08/2017
16/08/2017
30/08/2017
19/08/2017
6/08/2017
11/08/2017
25/08/2017
1/08/2017
5/08/2017
27/08/2017
22/08/2017
3/08/2017
5/08/2017
11/08/2017
26/08/2017
2/08/2017
31/08/2017
1/08/2017
25/08/2017
9/08/2017
14/08/2017
13/08/2017
16/08/2017
25/08/2017
9/08/2017
21/08/2017
26/08/2017
28/08/2017
18/08/2017
19/08/2017
11/08/2017
26/08/2017
3/08/2017
16/08/2017
23/08/2017
29/08/2017
19/08/2017
26/08/2017
29/08/2017
13/08/2017
17/08/2017
19/08/2017
26/08/2017
7/08/2017
11/08/2017
17/08/2017
26/08/2017
22/08/2017
4/08/2017
6/08/2017
25/08/2017
25/08/2017
22/08/2017
23/08/2017
2/08/2017
25/08/2017
26/08/2017
30/08/2017
6/08/2017
7/08/2017
26/08/2017
4/08/2017
11/08/2017
3/08/2017
10/08/2017
12/08/2017
20/08/2017
21/08/2017
17/

## 1. Graph 기반 RAG 구현하기: Text2Cyper

### 1) POLE(범죄수사데이터셋) 데이터 그래프 이해하기

### 2) Text2Cypher Retriever 사용하기

#### 2-1) DB 스키마 작성하기
- 리트리버를 사용하기 위해서는 LLM을 쓸 것임 
- LLM이 DB 스키마를 알고있어야지, 쿼리문을 잘 생성해낼 수 있음 
- 즉, 리트리버가 잘 작동하게 하기 위해서는 DB 스키마를 명시해줘야함 

#### 2-1-1) 방법1: 노드, 관계 프로퍼티, 관계방향성을 나타내는 스키마 작성 

In [10]:
from collections import defaultdict

def get_schema():
    schema = ""
    with driver.session() as session:
        # 모든 노드 라벨과 속성 추출
        node_schema = session.run("""
        CALL db.schema.nodeTypeProperties() YIELD nodeType, propertyName, propertyTypes
        RETURN nodeType, propertyName, propertyTypes
        """)

        nodes = defaultdict(dict)
        for record in node_schema:
            label = record["nodeType"].replace(":", "")
            prop = record["propertyName"]
            types = record["propertyTypes"]
            nodes[label][prop] = types[0] if types else "UNKNOWN"

        # 모든 관계 타입과 속성 추출
        rel_schema = session.run("""
        CALL db.schema.relTypeProperties() YIELD relType, propertyName, propertyTypes
        RETURN relType, propertyName, propertyTypes
        """)

        relationships = defaultdict(dict)
        for record in rel_schema:
            rel = record["relType"]
            prop = record["propertyName"]
            types = record["propertyTypes"]
            relationships[rel][prop] = types[0] if types else "UNKNOWN"

        # 관계 방향 및 타입 추출
        rel_types = session.run("""
        MATCH (a)-[r]->(b)
        RETURN DISTINCT labels(a) AS from_labels, type(r) AS rel_type, labels(b) AS to_labels
        """)

        rel_directions = set()
        for record in rel_types:
            from_label = f":{record['from_labels'][0]}"
            to_label = f":{record['to_labels'][0]}"
            rel_type = record['rel_type']
            rel_directions.add(f"({from_label})-[:{rel_type}]->({to_label})")

    # 출력
    schema += "\nNode properties:\n"
    for label, props in nodes.items():
        prop_str = ", ".join(f"{k}: {v}" for k, v in props.items())
        schema += f"{label} {{{prop_str}}}\n"

    schema += "\nRelationship properties:\n"
    for rel, props in relationships.items():
        prop_str = ", ".join(f"{k}: {v}" for k, v in props.items())
        schema += f"{rel} {{{prop_str}}}\n"

    schema += "\nThe relationships:\n"
    for rel in sorted(rel_directions):
        schema += f"{rel}\n"
    return schema

schema = get_schema()

  with driver.session() as session:


In [11]:
print(schema)


Node properties:
`Location` {address: String, postcode: String, longitude: Double, latitude: Double}
`Phone` {phoneNo: String}
`Email` {email_address: String}
`Officer` {name: String, surname: String, badge_no: String, rank: String}
`PostCode` {code: String}
`Area` {areaCode: String}
`PhoneCall` {call_date: String, call_type: String, call_duration: String, call_time: String}
`Crime` {id: String, date: String, type: String, last_outcome: String, charge: String, note: String}
`Object` {id: String, type: String, description: String}
`Vehicle` {make: String, model: String, year: String, reg: String}
`Person` {nhs_no: String, name: String, surname: String, age: String}

Relationship properties:
:`CURRENT_ADDRESS` {None: UNKNOWN}
:`HAS_PHONE` {None: UNKNOWN}
:`HAS_EMAIL` {None: UNKNOWN}
:`HAS_POSTCODE` {None: UNKNOWN}
:`POSTCODE_IN_AREA` {None: UNKNOWN}
:`LOCATION_IN_AREA` {None: UNKNOWN}
:`KNOWS_SN` {None: UNKNOWN}
:`KNOWS` {None: UNKNOWN}
:`CALLER` {None: UNKNOWN}
:`CALLED` {None: UNKNOWN

#### 2-2) Text2Cypher Retriever로 검색하기 

In [None]:
from neo4j import GraphDatabase
from neo4j_graphrag.retrievers import Text2CypherRetriever
from neo4j_graphrag.llm import OpenAILLM

neo4j_schema = get_schema() # get_schema() str(extract_schema_json())

examples = [
    """
    USER INPUT: Piccadilly에서 발생한 범죄 건수는?
    QUERY: MATCH (c:Crime)-[:OCCURRED_AT]->(l:Location {address: 'Piccadilly'})
    RETURN count(c) AS crime_count

    USER INPUT: M22 8ER 우편번호 지역의 범죄 유형 분포는?
    QUERY: MATCH (pc:PostCode {code: "M22 8ER"})<-[:HAS_POSTCODE]-(l:Location)<-[:OCCURRED_AT]-(c:Crime)
    RETURN c.type, count(*) AS count
    ORDER BY count DESC

    USER INPUT: Larive 경관(badge no 26-5234182)이 수사 중인 사건은 무엇인가요?
    QUERY: MATCH (c:Crime {last_outcome: 'Under investigation'})-[i:INVESTIGATED_BY]->(o:Officer {badge_no: '26-5234182', surname: 'Larive'})
    return *
    """
]

retriever = Text2CypherRetriever(
    driver=driver,
    llm=llm,  # type: ignore
    neo4j_schema=neo4j_schema,
    examples=examples,
)

In [None]:
query_text = "현재 수사 중인 범죄 사건의 개수는?"
result = retriever.search(query_text=query_text)

In [None]:
result.items

In [None]:
print(result.metadata["cypher"])

### 3) GraphRAG 파이프라인 모듈 사용하기

In [None]:
from neo4j_graphrag.llm.openai_llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG

llm = OpenAILLM(model_name="gpt-4o")
graph_rag = GraphRAG(retriever, llm)

In [None]:
query = "범죄자는 아니지만, 범죄자를 많이 알고 있는 사람은?"
response = graph_rag.search(query_text=query, return_context = True)

In [None]:
print("## 생성 답변 ##\n" + response.answer)
print("\n## Cypher ##\n" + response.retriever_result.metadata["cypher"])
print("\n## 검색 결과 ##")
for i in response.retriever_result.items: print(i.content)