## 0. Graph DB연결하기
* 영화 데이터 셋 활용

In [1]:
from neo4j import GraphDatabase, basic_auth


driver = GraphDatabase.driver(
  "neo4j://54.209.48.102:7687",
  auth=basic_auth("neo4j", "baby-grain-challenge"))

In [2]:
# 로컬에 있는 .env 를 통해 API key 불러옴 
from dotenv import load_dotenv
load_dotenv()

True

In [12]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

### DB 스키마 생성하기
- Text2Cyper 활용할 것임
- DB 스키마를 LLM이 잘 알고 있어야함

In [6]:
from collections import defaultdict

def get_schema():
    schema = ""
    with driver.session() as session:
        # 모든 노드 라벨과 속성 추출
        node_schema = session.run("""
        CALL db.schema.nodeTypeProperties() YIELD nodeType, propertyName, propertyTypes
        RETURN nodeType, propertyName, propertyTypes
        """)

        nodes = defaultdict(dict)
        for record in node_schema:
            label = record["nodeType"].replace(":", "")
            prop = record["propertyName"]
            types = record["propertyTypes"]
            nodes[label][prop] = types[0] if types else "UNKNOWN"

        # 모든 관계 타입과 속성 추출
        rel_schema = session.run("""
        CALL db.schema.relTypeProperties() YIELD relType, propertyName, propertyTypes
        RETURN relType, propertyName, propertyTypes
        """)

        relationships = defaultdict(dict)
        for record in rel_schema:
            rel = record["relType"]
            prop = record["propertyName"]
            types = record["propertyTypes"]
            relationships[rel][prop] = types[0] if types else "UNKNOWN"

        # 관계 방향 및 타입 추출
        rel_types = session.run("""
        MATCH (a)-[r]->(b)
        RETURN DISTINCT labels(a) AS from_labels, type(r) AS rel_type, labels(b) AS to_labels
        """)

        rel_directions = set()
        for record in rel_types:
            from_label = f":{record['from_labels'][0]}"
            to_label = f":{record['to_labels'][0]}"
            rel_type = record['rel_type']
            rel_directions.add(f"({from_label})-[:{rel_type}]->({to_label})")

    # 출력
    schema += "\nNode properties:\n"
    for label, props in nodes.items():
        prop_str = ", ".join(f"{k}: {v}" for k, v in props.items())
        schema += f"{label} {{{prop_str}}}\n"

    schema += "\nRelationship properties:\n"
    for rel, props in relationships.items():
        prop_str = ", ".join(f"{k}: {v}" for k, v in props.items())
        schema += f"{rel} {{{prop_str}}}\n"

    schema += "\nThe relationships:\n"
    for rel in sorted(rel_directions):
        schema += f"{rel}\n"
    return schema

schema = get_schema()



In [7]:
print(schema)


Node properties:
`Genre` {name: String}
`User` {name: String, userId: String}
`Director``Person` {name: String, imdbId: String, tmdbId: String, poster: String, born: Date, died: Date, bornIn: String, bio: String, url: String}
`Actor``Person` {name: String, imdbId: String, tmdbId: String, poster: String, born: Date, died: Date, bornIn: String, bio: String, url: String}
`Actor``Director``Person` {name: String, imdbId: String, tmdbId: String, poster: String, born: Date, died: Date, bornIn: String, bio: String, url: String}
`Movie` {movieId: String, imdbId: String, title: String, tmdbId: String, year: Long, countries: StringArray, languages: StringArray, plot: String, imdbRating: Double, imdbVotes: Long, released: String, runtime: Long, poster: String, revenue: Long, budget: Long, url: String, embedding: DoubleArray}

Relationship properties:
:`IN_GENRE` {None: UNKNOWN}
:`RATED` {rating: Double, timestamp: Long}
:`ACTED_IN` {role: String}
:`DIRECTED` {role: String}

The relationships:
(:A

## 1. Text2Cyper기반 GraphRAG Agent 만들기

### 1) Graph State 설정하기

In [None]:
from operator import add
from typing import Annotated, List

from typing_extensions import TypedDict

class InputState(TypedDict):
    question: str

class OverallState(TypedDict):
    question: str # 사용자 질문을 받아옴
    next_action: str # 다음단계
    cypher_statement: str # Cypher 쿼리
    cypher_errors: List[str] # Cypher 쿼리 오류
    database_records: List[dict] # DB 실행 결과 
    steps: Annotated[List[str], add] # 그래프 실행 단계

class OutputState(TypedDict):
    answer: str
    steps: List[str]
    cypher_statement: str

### 2) 가드레일(일반질문 vs 영화질문) 노드 추가 

In [13]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

guardrails_system = """
As an intelligent assistant, your primary objective is to decide whether a given question is related to movies or not. 
If the question is related to movies, output "movie". Otherwise, output "end".
To make this decision, assess the content of the question and determine if it refers to any movie, actor, director, film industry, 
or related topics. Provide only the specified output: "movie" or "end".
"""
guardrails_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            guardrails_system,
        ),
        (
            "human",
            ("{question}"),
        ),
    ]
)


class GuardrailsOutput(BaseModel):
    decision: Literal["movie", "end"] = Field(
        description="Decision on whether the question is related to movies"
    )


guardrails_chain = guardrails_prompt | llm.with_structured_output(GuardrailsOutput)

In [None]:
def guardrails(state: InputState) -> OverallState: # 타입 힌트 
    """
    Decides if the question is related to movie or not.
    """
    print("-- GUARDRAILS --")
    question = state["question"]
    guardrails_output = guardrails_chain.invoke({"question": question}) # 랭체인 실행행
    print("Guardrails output:", guardrails_output)

    database_records = None
    if guardrails_output.decision =="end":
        database_records = "This questions is not about moveis of their cast. Therefore I cannot answer this question"
    return {
        "next_action": guardrails_output.decision, 
        "database_records": database_records,
        "steps": ["guardrail"]
    } # OverallState의 일부만 반환, LangGraph가 기존 State에 병합합