In [1]:
%pip install --upgrade --quiet  langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair langchain-openai langchain_core

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Users\kyith\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_ollama import OllamaEmbeddings
import os
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from neo4j import  Driver

from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [7]:
graph = Neo4jGraph()

In [10]:
loader = TextLoader(file_path="pdpa12.txt", autodetect_encoding=True)
docs = loader.load()


In [11]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=24)
documents = text_splitter.split_documents(documents=docs)

In [12]:

llm = OllamaFunctions(model="llama3.1", temperature=0, format="json")

llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(documents)


  llm = OllamaFunctions(model="llama3.1", temperature=0, format="json")


In [13]:
graph_documents[0]

GraphDocument(nodes=[Node(id='Official Emblem Of Royal Command', type='Object', properties={}), Node(id='Personal Data Protection Act', type='Act', properties={}), Node(id='King Phra Poramenthra Ramathibodi Sisin Maha Vajiralongkorn Phra Vajira Klao Chao Yu Hua', type='King', properties={})], relationships=[Relationship(source=Node(id='Official Emblem Of Royal Command', type='Object', properties={}), target=Node(id='King Phra Poramenthra Ramathibodi Sisin Maha Vajiralongkorn Phra Vajira Klao Chao Yu Hua', type='King', properties={}), type='ASSOCIATED_WITH', properties={}), Relationship(source=Node(id='Personal Data Protection Act', type='Act', properties={}), target=Node(id='King Phra Poramenthra Ramathibodi Sisin Maha Vajiralongkorn Phra Vajira Klao Chao Yu Hua', type='King', properties={}), type='GOVERNS', properties={})], source=Document(metadata={'source': 'pdpa12.txt'}, page_content='[Official Emblem of Royal Command]\nPersonal Data Protection Act,\nB.E. 2562 (2019)\n----------\nH

In [14]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [15]:
embeddings = OllamaEmbeddings(
    model="nomic-embed-text",
)

vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)
vector_retriever = vector_index.as_retriever()

In [16]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))

def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

# Function to execute the query
def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

# Call the function to create the index
try:
    create_index()
except:
    pass

# Close the driver connection
driver.close()

Fulltext index created successfully.


In [17]:

class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)


entity_chain = llm.with_structured_output(Entities)

In [18]:
entity_chain.invoke("Who are Nonna Lucia and Giovanni Caruso?")

Entities(names=['Nonna Lucia', 'Giovanni Caruso'])

In [19]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [20]:
print(graph_retriever("Who is Nonna Lucia?"))






In [21]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
{graph_data}
vector data:
{"#Document ". join(vector_data)}
    """
    return final_data

In [22]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [23]:
chain.invoke(input="Who is Nonna Lucia? Did she teach anyone about restaurants or cooking?")



"There's no mention of Nonna Lucia in the provided context. It seems unrelated to the topic of Data Processor."

In [24]:
chain.invoke(input="What is PDPA")



'The PDPA stands for Personal Data Protection Act, a law regulating data collection and usage in Thailand.'

In [29]:
chain.invoke(input="What is personal data?")



'Personal data refers to any information relating to a person that enables their identification, directly or indirectly.'

In [26]:
chain.invoke(input=" What is a personal data protection policy?")



'A personal data protection policy outlines guidelines for safeguarding personal information, ensuring compliance with relevant laws and regulations.'

In [None]:
chain.invoke(input=" What is a privacy statement?")

ValueError: `tool_calls` missing from AIMessage: {message}

In [None]:
chain.invoke(input="What if there is no Data Protection Policy")

ValueError: `tool_calls` missing from AIMessage: {message}

In [36]:
chain.invoke(input="What are the rights of the data subject?")

ValueError: `tool_calls` missing from AIMessage: {message}

In [37]:
chain.invoke(input="Who are the individuals involved in personal data?")



'A Data Controller and a Data Processor, as well as the Person whose information is being collected.'

In [38]:
chain.invoke(input="Who are the personal data controllers and personal data processors?")



'The personal data controllers are the Data Controller, Person, Juristic Person, Master Plan, and Cabinet. The personal data processors are the Data Processor and Person.'

In [39]:
chain.invoke(input="Duties of the Personal Data Protection Officer")



'The Personal Data Protection Officer is responsible for determining measures or guidelines to comply with the Act, issuing notifications or rules, announcing and establishing guidance for data protection, and implementing security protection of personal data.'

In [40]:
chain.invoke(input="In what cases can personal data be collected, used or disclosed?")



'Personal data can be collected, used or disclosed in the following cases: for mass media, fine arts, literature, public interest, or when relevant to and useful for its protection.'

In [42]:
chain.invoke(input=" What are the conditions for sending or transferring personal data abroad?")



"According to the provided context, personal data can be sent or transferred abroad if it's done in accordance with professional ethics or for public interest."

In [44]:
chain.invoke(input=" What are the penalties for failure to comply with the PDPA?")

ValueError: `tool_calls` missing from AIMessage: {message}

In [45]:

chain.invoke(input=" What are the conditions of consent?")



'The conditions of consent include qualifications, no prohibited characteristics, and the consent of such persons to the Cabinet for appointment.'

In [46]:

chain.invoke(input=" What must entrepreneurs prepare to support PDPA?")



'Entrepreneurs should prepare policies and procedures for data protection, as well as train their staff on the Personal Data Protection Act.'