## Knowledge Graph (History of Greenland, Norway & Denmark)



**RAG Pipeline with Neo4j and Langchain**

In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental wikipedia

In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

In [None]:
!pip install --quiet langchain-neo4j

In [None]:
import os

In [None]:
from google.colab import userdata
OPENAI_API_KEY=userdata.get('OPENAI_API_KEY')

In [None]:
NEO4J_URI="neo4j+s://b79f9be3.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="F-SLmj1h4qsnhshI_2ImfzsDK597qkxjO9A2E83cluM"

In [None]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [None]:
from langchain_community.graphs import Neo4jGraph

In [None]:
graph = Neo4jGraph()

  graph = Neo4jGraph()


In [None]:
from langchain_community.document_loaders import WikipediaLoader

topics = [
    "History of Greenland",
    "History of Norway",
    "History of Denmark"
]



raw_documents = []
for topic in topics:
    docs = WikipediaLoader(query=topic).load()
    raw_documents.extend(docs)

print(f"Loaded {len(raw_documents)} documents")



Loaded 75 documents


In [None]:
len(raw_documents)

75

In [None]:
from langchain_text_splitters import TokenTextSplitter
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents)
print(f"Total chunks: {len(documents)}")

Total chunks: 145


In [None]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125")

In [None]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
llm_transformer = LLMGraphTransformer(llm=llm)



In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [60]:
#graph_documents

In [None]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [None]:
# directly show the graph resulting from the given Cypher query
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

In [None]:
from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase

In [None]:
try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

In [None]:
def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    display(widget)
    return widget

In [59]:
#showGraph()

In [None]:
from typing import Tuple, List, Optional

In [None]:
from langchain_community.vectorstores import Neo4jVector

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate

In [None]:
from langchain_openai import OpenAIEmbeddings
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [None]:
graph.query("CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

[]

In [None]:
from pydantic import BaseModel, Field
from typing import List

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""
    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

In [None]:
entity_chain = prompt | llm.with_structured_output(Entities)



In [None]:
entity_chain.invoke({"question": "Who is Erik the Red?"}).names

['Erik the Red']

In [None]:
from langchain_neo4j.vectorstores.neo4j_vector import remove_lucene_chars

In [None]:
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [None]:
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL (node) {
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [None]:
print(structured_retriever("Who is Erik the Red?"))

Erik The Red - DISCOVERY -> Greenland
Erik The Red - SETTLEMENT -> Greenland
Saga Of Erik The Red - ABOUT -> Grænlendingar


In [None]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [None]:
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [None]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [None]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

In [None]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser

In [None]:
def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

In [None]:
_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [None]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""

In [None]:
prompt = ChatPromptTemplate.from_template(template)

In [None]:
chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
import logging
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)

In [None]:
chain.invoke({"question": "Which house did Frederick IV of Denmark belong to?"})

Search query: Which house did Frederick IV of Denmark belong to?


'Frederick IV of Denmark belonged to the House of Oldenburg.'

In [None]:
chain.invoke({"question": "When did the Western Settlement disappear?"})

Search query: When did the Western Settlement disappear?


'The Western Settlement in Greenland disappeared around the year 1355.'

In [None]:
chain.invoke({"question": "What happened during World War II in Greenland?"})

Search query: What happened during World War II in Greenland?


'During World War II, Greenland declared itself a self-ruling territory after Denmark was occupied by Nazi Germany. The United States established facilities for air and sea traffic, leading to economic growth through trade with the US, Canada, and Portugal. Greenland also played a strategic role in the Allied war efforts, with the US building military bases and maintaining control over the territory.'

In [None]:
chain.invoke({"question": "What did the Norse trade from Greenland?"})

Search query: What did the Norse trade from Greenland?


'The Norse settlers in Greenland traded with the Dutch for opportunities in trade and a new life in America.'

In [None]:
chain.invoke({"question": "Which dual-state entity claimed sovereignty over Greenland in 1721?"})

Search query: Which dual-state entity claimed sovereignty over Greenland in 1721?


'Denmark-Norway claimed sovereignty over Greenland in 1721.'

In [None]:
chain.invoke({"question": "Which Norwegian archdiocese was the Diocese of Garðar subject to?"})

Search query: Which Norwegian archdiocese was the Diocese of Garðar subject to?


'The Diocese of Garðar was subject to the Archbishop of Lund.'

In [None]:
chain.invoke({"question": "Who discovered Greenland and where were they from?"})

Search query: Who discovered Greenland and where were they from?


'Greenland was discovered by Norse Icelandic explorers who settled on its southwestern coast in the 9th century CE.'

In [None]:
chain.invoke({"question": "Why is Greenland Danish instead of Norwegian?"})

Search query: Why is Greenland Danish instead of Norwegian?


'Greenland is Danish instead of Norwegian because it has been politically and culturally associated with the European kingdoms of Norway and Denmark for over a millennium, beginning in 986. Greenland was transferred from the Norwegian to the Danish crown in 1814 when Denmark and Norway separated. Greenland has been under Danish sovereignty since then.'

In [None]:
chain.invoke({"question": "What historical connections exist between Norway, Denmark, and Greenland?"})

Search query: What historical connections exist between Norway, Denmark, and Greenland?


'Norway and Denmark were part of a historical union called Denmark-Norway, which included Greenland as a colony. Greenland was initially under Norwegian rule, then became part of the Kalmar Union with Denmark and Sweden, and later fell under Danish control as part of Denmark-Norway.'