In [1]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

Note: you may need to restart the kernel to use updated packages.


In [2]:
try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

In [3]:
import os
from langchain_community.graphs import Neo4jGraph

os.environ["OPENAI_API_KEY"] = "sk-"
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph()

In [4]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

# Read the wikipedia article
raw_documents = WikipediaLoader(query="Ada Lovelace").load()
# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])



  lis = BeautifulSoup(html).find_all('li')


In [5]:
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm=ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview")

llm_transformer = LLMGraphTransformer(llm=llm)

In [6]:
from datetime import datetime

print(f"Start at {datetime.now()}")
graph_documents = llm_transformer.convert_to_graph_documents(documents)
graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)
print(f"Finish at {datetime.now()}")

Start at 2024-03-11 17:36:24.517242
Finish at 2024-03-11 17:38:46.181223


In [17]:
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
# directly show the graph resulting from the given Cypher query

default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    # create a neo4j session to run queries
    driver = GraphDatabase.driver(uri = os.environ["NEO4J_URI"], auth = (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    #display(widget)
    return widget

In [18]:
showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [9]:
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings


vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [10]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from typing import List, Optional
# Retriever

graph.query("CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the person, organization, or business entities that "
        "appear in the text",
    )


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)
entity_chain.invoke({"question": "Hey, how is Tomaz doing?"}).names

  warn_beta(


['Tomaz']

In [11]:
def remove_lucene_chars(text: str) -> str:
    """Remove Lucene special characters"""
    special_chars = [
        "+",
        "-",
        "&",
        "|",
        "!",
        "(",
        ")",
        "{",
        "}",
        "[",
        "]",
        "^",
        '"',
        "~",
        "*",
        "?",
        ":",
        "\\",
    ]
    for char in special_chars:
        if char in text:
            text = text.replace(char, " ")
    return text.strip()


def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~0.8) to each word, then combines them using the AND
    operator. Useful for mapping movies and people from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~0.8 AND"
    full_text_query += f" {words[-1]}~0.8"
    return full_text_query.strip()
    
# Fulltext index query
def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' - ' + neighbor.id AS output
              UNION
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' - ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [12]:
print(structured_retriever("How is Lovelace doing?"))

Augusta Ada King - HOLDS_TITLE - Countess Of Lovelace
Augusta Ada King - FRIEND - Charles Babbage
Augusta Ada King - INTERESTED_IN - Analytical Engine
Lord Byron - PARENT - Augusta Ada King
Anne Isabella Milbanke - PARENT - Augusta Ada King
William King - MARRIED - Augusta Ada King
William King - HOLDS_TITLE - Earl Of Lovelace
Augusta Ada King - MENTORED_BY - Mary Somerville
Luigi Menabrea - HOLDS_TITLE - Prime Minister Of Italy
Augusta Ada King - AUTHOR - Notes
First Computer Program - CREATED_BY - Ada
Babbage'S Personal Notes - DATE - 1836/1837
Ada - EXAMINED - Analytical Engine
Ada - ADVOCATED - Poetical Science
Lord Byron - PARENT_OF - Ada
Lady Byron - PARENT_OF - Ada
Ada - NAMED_AFTER - Augusta Leigh
Ada - MOVED_TO - Kirkby Mallory
Lord Byron - SUBJECT_TO - English Law
Lord Byron - DIED_IN - 1824
Ada Lovelace - DEVELOPED_BY - Nvidia
Ada Lovelace - SUCCESSOR_OF - Ampere Architecture
Ada Lovelace - ANNOUNCED_ON - September 20, 2022
Ada Lovelace - NAMED_AFTER - English Mathematician 

In [13]:
def retriever(question: str):
    print(question)
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [19]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts.prompt import PromptTemplate
from typing import Tuple
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser



# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [20]:
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough


template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [21]:
chain.invoke({"question": "Who is Ada Lovelace?"})

Who is Ada Lovelace?


'Ada Lovelace, born Augusta Ada King, Countess of Lovelace (née Byron), was an English mathematician and writer, most notably recognized for her work on Charles Babbage\'s proposed mechanical general-purpose computer, the Analytical Engine. She is celebrated for being the first to recognize that the machine had potential applications beyond pure calculation, effectively making her one of the first computer programmers. Ada Lovelace was the only legitimate child of the poet Lord Byron and his wife, Anne Isabella Milbanke. Her contributions to the field of computing are commemorated by Ada Lovelace Day, an annual event celebrating the achievements of women in STEM fields. Additionally, Nvidia named a graphics processing unit microarchitecture "Lovelace" in her honor, highlighting her lasting impact on the fields of mathematics and computer science.'

In [22]:
chain.invoke({"question": "Where did she live?", "chat_history": [("Who is Ada Lovelace?", "She is a cool person from the past.")]})

Where did Ada Lovelace live?


'Ada Lovelace moved to Kirkby Mallory.'