# langchain-core

contains simple, core abstractions that have emerged as a standard, as well as LangChain Expression Language as a way to compose these components together. This package is now at version 0.1 and all breaking changes will be accompanied by a minor version bump.

# langchain-community
contains all third party integrations. We will work with partners on splitting key integrations out into standalone packages over the next month.

# langchain
contains higher-level and use-case specific chains, agents, and retrieval algorithms that are at the core of your application's cognitive architecture. We are targeting a launch of a stable 0.1 release for langchain in early January.#

In [1]:
%pip install --upgrade --quiet  langchain langchain-community langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

In [2]:
from google.colab import userdata
OPENROUTER_API_KEY=userdata.get('OPENROUTER_API_KEY')

In [3]:
from langchain_community.graphs import Neo4jGraph
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
    ConfigurableField
)

try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from typing import List , Tuple , Optional
from langchain.messages import AIMessage , HumanMessage
from langchain_core.output_parsers import StrOutputParser
from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase
import os
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import WikipediaLoader
from langchain_text_splitters import TokenTextSplitter
from langchain_openai import ChatOpenAI , OpenAIEmbeddings
from langchain_experimental.graph_transformers import LLMGraphTransformer
from typing import List , Optional , Tuple
from langchain_community.vectorstores import Neo4jVector
from pydantic import BaseModel , Field
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

In [4]:
NEO4J_URI="neo4j+s://42348971.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD = userdata.get('NEO4J_PASSWORD')

os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD


In [5]:
graph = Neo4jGraph()

  graph = Neo4jGraph()


In [6]:
raw_documents = WikipediaLoader(query = "Elizabeth I").load()



  lis = BeautifulSoup(html).find_all('li')


In [7]:
len(raw_documents)

24

In [8]:
text_splitter = TokenTextSplitter(
    chunk_size = 1054,
    chunk_overlap = 20
)
documents = text_splitter.split_documents(raw_documents[:2])

In [9]:
llm = ChatOpenAI(
    temperature=0 ,
    model_name = "mistralai/mixtral-8x7b-instruct" ,
    max_tokens=4096 ,
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key=os.environ["OPENROUTER_API_KEY"]
    )

In [10]:
llm_transformer = LLMGraphTransformer(
    llm=llm,
    strict_mode=True,
)

In [11]:
graph_documents = llm_transformer.convert_to_graph_documents(documents[:1])

In [12]:
graph.add_graph_documents(
    graph_documents=graph_documents ,
    baseEntityLabel=True,
    include_source=True ,
)

In [13]:
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

In [14]:
def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"],
                os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    display(widget)
    return widget

In [15]:
showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

GraphWidget(layout=Layout(height='800px', width='100%'))

In [16]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(
        openai_api_base="https://openrouter.ai/api/v1",
        openai_api_key=os.environ["OPENROUTER_API_KEY"]
    ),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [17]:
graph.query("CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

[]

In [18]:
class Entities(BaseModel) :
  names : List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",

  )

In [19]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

In [20]:
entity_chain = prompt | llm.with_structured_output(Entities)

In [21]:
entity_chain.invoke({"question": "Where was Amelia Earhart born?"}).names

['Amelia Earhart', 'Organization/Person', 'Person']

In [22]:
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [23]:
# Fulltext index query
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [24]:
print(structured_retriever("Who is Elizabeth I?"))

  words = [el for el in remove_lucene_chars(input).split() if el]


Elizabeth I - DIED_IN -> Person
Elizabeth I - RULED -> Elizabethan Era
Elizabeth I - RULED -> Good Counsel
Elizabeth I - RULED -> Queen Of England And Ireland
Elizabeth I - RULED -> Baron Burghley
Elizabeth I - RULED -> England
Elizabeth I - BORN_IN -> House Of Tudor
Elizabeth I - BORN_IN -> English Church
Elizabeth I - PRECEDED -> Henry Viii
Elizabeth I - PRECEDED -> Protestant Rebels
Elizabeth I - PRECEDED -> Francis Walsingham
Elizabeth I - PRECEDED -> Elizabeth I
Elizabeth I - SUCCEEDED -> James Vi Of Scotland
Elizabeth I - CHILD -> Henry Viii
Elizabeth I - MOTHER -> Anne Boleyn
Elizabeth I - MEMBER -> House Of Tudor
Elizabeth I - RULER -> England
Elizabeth I - RULER -> England And Ireland
Elizabeth I - OCCURRED_DURING -> Elizabethan Era
Elizabeth I - NAMED_AFTER -> Elizabeth Of York
Elizabeth I - NAMED_AFTER -> Lady Elizabeth Howard
Elizabeth I - ASCENDED_TO -> Throne
Elizabeth I - DEPENDED_ON -> William Cecil
Elizabeth I - ESTABLISHED -> English Church
Elizabeth I - SUCCEEDED_BY 

In [25]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question, k=1)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [26]:
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [27]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [28]:
def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

In [29]:
_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | llm # Changed from ChatOpenAI(temperature=0) to llm
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [30]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [31]:
chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)


In [39]:
print(chain.invoke({"question": "Which house did Elizabeth I belong to?"}))

print(
    chain.invoke(
        {
            "question": "When was she born?",
            "chat_history": [("Which house did Elizabeth I belong to?", "House of Tudor")],
        }
    )
)



Elizabeth I belonged to the House of Tudor. This information is implied as she was the child of Henry VIII of England, who was a Tudor king.




Elizabeth I was born on September 7, 1533.
