In [6]:
import os

In [7]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough

In [8]:
from api_key import API_key,neo4j_url,neo4j_username,neo4j_password

In [9]:
os.environ["OPENAI_API_KEY"] = API_key
os.environ["NEO4J_URI"] = neo4j_url
os.environ["NEO4J_USERNAME"] = neo4j_username
os.environ["NEO4J_PASSWORD"] = neo4j_password

In [10]:
graph = Neo4jGraph()

In [11]:
# Read the wikipedia article
raw_documents = WikipediaLoader(query="Naveen Patnaik").load()

In [12]:
raw_documents[4]

Document(page_content="Arup Patnaik (born 8 September 1955) is a retired Indian Police Service officer who was the 36th Police Commissioner of Mumbai. He retired on 30 September 2015 after a 36-year career in the Maharashtra Police. He is a recipient of the President's Police Medal for Distinguished Service in 2003 and the Indian Police Medal for meritorious services in 1994.\nArup Patnaik has previously served as the Chairman of Odisha's State Youth Welfare Board, the Biju Yuva Vahini and was accorded the rank and status of Minister of State by the Odisha State Government.\nPatnaik joined the Biju Janata Dal in 2018. \nIn the 2019 General Election he contested the Bhubaneswar Lok Sabha constituency.  \nThe BJP candidate, Aparajita Sarangi, won the election.  \nOn October 25, 2019 Odisha Chief Minister Naveen Patnaik appointed Arup Patnaik as the Convenor of the Biju Janata Dal social service wing, 'Odisha Mo Parivar.  \nOn October 30, 2019, Arup Patnaik was inducted as a Co-Opted Memb

In [13]:
# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:1])

In [15]:
documents[1]

Document(page_content=", he was elected as a member to the 11th Lok Sabha in the by-election from Aska Parliamentary Constituency in Odisha, India. He was a member of the Consultative Committee of Ministry of Steel & Mines, Member of Standing Committee on Commerce, and Member Library Committee of Parliament. In \nDecember 1997, Naveen split from the Janata Dal and founded the Biju Janata Dal. The new party was in alliance with the BJP-led National Democratic Alliance performed well and Naveen Patnaik was selected the Union Minister for Mines in the cabinet of Atal Bihari Vajpayee.\n\n\n=== Elections 2000 ===\nIn the 2000 Assembly election, BJD won the majority of seats in alliance with the BJP in the Odisha Assembly elections, Patnaik resigned from the Union cabinet and was sworn in as the Chief Minister of Odisha.\n\n\n=== Elections 2004 ===\nBJP led NDA lost the general elections in 2004, however, the coalition led by Naveen Patnaik emerged victorious in the state legislative electio

In [16]:
llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

In [17]:
llm_transformer = LLMGraphTransformer(llm=llm)

In [18]:
# Extract graph data
graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [19]:
# Store to neo4j
graph.add_graph_documents(
  graph_documents, 
  baseEntityLabel=True, 
  include_source=True
)

In [20]:
# directly show the graph resulting from the given Cypher query
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    #display(widget)
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [21]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)



In [22]:
# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )

In [23]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

In [24]:
entity_chain = prompt | llm.with_structured_output(Entities)

In [25]:
entity_chain.invoke({"question": "Where was Naveen Pattnaik born?"}).names

['Naveen Pattnaik']

In [26]:
graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines 
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [27]:
# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [29]:
print(structured_retriever("Who is Navin Pattnaik?"))

Naveen Patnaik - POSITION -> 14Th Chief Minister Of Odisha
Naveen Patnaik - POSITION -> Union Minister Of Steel And Mines
Naveen Patnaik - POSITION -> Chief Minister Of Odisha
Naveen Patnaik - POSITION -> Union Minister For Mines
Naveen Patnaik - START_DATE -> 5 March 2000
Naveen Patnaik - START_DATE -> 1998
Naveen Patnaik - END_DATE -> 12 June 2024
Naveen Patnaik - END_DATE -> 2000
Naveen Patnaik - FOUNDER -> Biju Janata Dal
Naveen Patnaik - FOUNDER -> Indian National Trust For Art And Cultural Heritage
Naveen Patnaik - FOUNDER_DATE -> 1997
Naveen Patnaik - MEMBER -> Lok Sabha
Naveen Patnaik - MEMBER -> 11Th Lok Sabha
Naveen Patnaik - MEMBER -> Consultative Committee Of Ministry Of Steel & Mines
Naveen Patnaik - MEMBER -> Standing Committee On Commerce
Naveen Patnaik - MEMBER -> Library Committee Of Parliament
Naveen Patnaik - MEMBER -> Third Front
Naveen Patnaik - MEMBER_LOCATION -> Aska
Naveen Patnaik - BIRTHPLACE -> Cuttack
Naveen Patnaik - CHILD_OF -> Biju Patnaik
Naveen Patnaik -

In [30]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [33]:
# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [31]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [34]:
chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [35]:
chain.invoke({"question": "Where did Navin Patnaik attend school?"})

Search query: Where did Navin Patnaik attend school?


"Naveen Patnaik attended Welham Boys' School in Dehradun and The Doon School."

In [36]:
chain.invoke(
    {
        "question": "Who was his class mate there",
        "chat_history": [("Where did Navin Patnaik attend school?", "Naveen Patnaik attended Welham Boys' School in Dehradun and The Doon School.")],
    }
)

Search query: Who was Naveen Patnaik's classmate at Welham Boys' School in Dehradun and The Doon School?


'Sanjay Gandhi and Rajiv Gandhi were his classmates at The Doon School.'