## Query with Memory from txt File 

In [85]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

In [86]:
from langchain.document_loaders import TextLoader
loader = TextLoader("test_data.txt")
documents = loader.load()


In [87]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)

Created a chunk of size 1049, which is longer than the specified 1000
Created a chunk of size 1049, which is longer than the specified 1000


In [88]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [89]:
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)

In [90]:
query = "From whom is the Document?"
result = qa({"question": query})

In [91]:
result["answer"]

' The document is from Linea Schmidt.'

In [92]:
query2 = "Is there any information about her profession?"
result2 = qa({"question": query2})

In [93]:
result2["answer"]

" I don't know."

In [94]:
query3 = "What is that document about then?"
result3 = qa({"question": query3})

In [95]:
result3["answer"]

" I don't know."

In [96]:
result3

{'question': 'What is that document about then?',
 'chat_history': [HumanMessage(content='From whom is the Document?'),
  AIMessage(content=' The document is from Linea Schmidt.'),
  HumanMessage(content='Is there any information about her profession?'),
  AIMessage(content=" I don't know."),
  HumanMessage(content='What is that document about then?'),
  AIMessage(content=" I don't know.")],
 'answer': " I don't know."}

## Query with Memory from Web

In [97]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

In [98]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [99]:
from langchain.memory import ConversationSummaryMemory
llm = OpenAI(temperature=0)
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)

In [100]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [101]:
qa("How do agents use Task decomposition?")

{'question': 'How do agents use Task decomposition?',
 'chat_history': [SystemMessage(content='')],
 'answer': ' Task decomposition for agents involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. The agent can also do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.'}

In [102]:
qa("What are the various ways to implement memory to support it?")

{'question': 'What are the various ways to implement memory to support it?',
 'chat_history': [SystemMessage(content='\nThe human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good because it will help humans reach their full potential. The human then asks how agents use Task decomposition, to which the AI responds that it involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. The agent can also do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.')],
 'answer': ' Memory can be used to store information about the task, such as the subgoals, the steps taken to achieve them, and the results of each step. This information can then be used to inform future decisions and refine the task decomposition.'}

## Query from Neo4J database 

In [103]:
from neo4j import GraphDatabase

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self._driver = GraphDatabase.driver(uri, auth=(user, pwd))

    def close(self):
        self._driver.close()

In [104]:
class Neo4jLoader:
    def __init__(self, connection, query):
        self.connection = connection
        self.query = query

    def load(self):
        with self.connection._driver.session() as session:
            results = session.run(self.query)
            data = [record for record in results]
        return data

In [105]:
class Neo4jService(object):

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def fetch_data(self):
        with self._driver.session() as session:
            return session.read_transaction(self._fetch_data_from_db)

    @staticmethod
    def _fetch_data_from_db(tx):
        query = """
        MATCH (s:Study)
        OPTIONAL MATCH (s)-[:HAS_INDICATION]->(ind:Indication)
        OPTIONAL MATCH (s)-[:HAS_SUBINDICATION]->(sub:SubIndication)
        OPTIONAL MATCH (s)-[relContact:HAS_CONTACT]->(con:Contact)
        OPTIONAL MATCH (s)-[:CONDUCTED_AT]->(sc:StudyCenter)
        OPTIONAL MATCH (s)-[:HAS_CRITERIA]->(criteria)
        WITH s, ind, sub, 
            COLLECT(DISTINCT con.name) AS ContactNames, 
            COLLECT(DISTINCT relContact.email) AS ContactEmails,
            COLLECT(DISTINCT sc.name) AS StudyCenterNames, 
            COLLECT(DISTINCT criteria.value) AS CriteriaValues
        RETURN 
        s.name AS StudyName,
        s.identifier AS Identifier,
        ind.name AS Indication,
        sub.name AS SubIndication,
        ContactNames,
        ContactEmails,
        StudyCenterNames,
        CriteriaValues,
        s.embedding AS Embedding,
        s.metadata AS Metadata

        """
        result = tx.run(query)
        return [{
            "StudyName": record["StudyName"],
            "Identifier": record["Identifier"],
            "Indication": record["Indication"],
            "SubIndication": record["SubIndication"],
            "ContactNames": ", ".join(record["ContactNames"]) if record["ContactNames"] else None,  # Joining list to string
            "ContactEmails": ", ".join(record["ContactEmails"]) if record["ContactEmails"] else None,  # Joining list to string
            "StudyCenterNames": ", ".join(record["StudyCenterNames"]) if record["StudyCenterNames"] else None,  # Joining list to string
            "CriteriaValues": ", ".join(record["CriteriaValues"]) if record["CriteriaValues"] else None,  # Joining list to string
            "Embedding": record["Embedding"],
            "Metadata": record["Metadata"],
            "SummaryText": " ".join([
                f"Study Name: {record['StudyName']}",
                f"Identifier: {record['Identifier']}",
                f"Indication: {record['Indication']}",
                f"Sub-Indication: {record['SubIndication']}",
                f"Contact Names: {', '.join(record['ContactNames']) if record['ContactNames'] else 'N/A'}",  # Joining list to string
                f"Contact Emails: {', '.join(record['ContactEmails']) if record['ContactEmails'] else 'N/A'}",  # Joining list to string
                f"Study Center Names: {', '.join(record['StudyCenterNames']) if record['StudyCenterNames'] else 'N/A'}",  # Joining list to string
                f"Criteria Values: {', '.join(record['CriteriaValues']) if record['CriteriaValues'] else 'N/A'}"  # Joining list to string
            ])
        } for record in result]



In [106]:
# Use your Neo4j credentials here
neo_service = Neo4jService(uri="bolt://localhost:7687", user="neo4j", password="password")
data = neo_service.fetch_data()
neo_service.close()

  return session.read_transaction(self._fetch_data_from_db)


In [107]:
data[1]

{'StudyName': 'Safety, Adherence and Persistence in a Real-World Cohort of German MS Patients Newly Treated With Ocrelizumab (CONFIDENCE)',
 'Identifier': 'STUDYMATCH-9c5807da-eb1b-49f9-84c6-f7715083685a',
 'Indication': 'Multiple Sclerosis',
 'SubIndication': 'UNKNOWN_VALUE',
 'ContactNames': ' Marcia Gasis, Marc Pawlitzki',
 'ContactEmails': 'UNKNOWN_VALUE, marcguenter.pawlitzki@med.uni-duesseldorf.de',
 'StudyCenterNames': 'University Clinic Düsseldorf',
 'CriteriaValues': None,
 'Embedding': [-0.009933089107644977,
  -0.003986260468925026,
  0.004819159028108939,
  -0.03246591306038755,
  0.005545802992599137,
  0.029010926030592386,
  -0.013600585326827565,
  -0.00377032354673218,
  -0.02171706266060186,
  -0.014903060462950292,
  0.02055169124876032,
  0.0003624651945629001,
  -0.0013110443517209483,
  0.017987870044889515,
  -0.015780518151620754,
  0.02249854848733357,
  0.05023441938160139,
  0.007053932783821047,
  -0.011921077277238066,
  -0.013909066378153747,
  -0.03394662

In [108]:

data = loader.load()


In [109]:
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
#all_splits = text_splitter.split_documents(data)

# get from all studies in data the list of embeddings
embeddings = [study["Embedding"] for study in data]
vectorstore = Chroma.from_embeddings(embeddings)
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)

retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

response = qa("How do agents use Task decomposition?")
print(response)

TypeError: 'Document' object is not subscriptable

## Direct conn to NEO4j

In [110]:
from langchain.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector


In [111]:
graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password="password")

In [112]:
print(graph.schema) #works - Langchain gets good connection as it seems


        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}, {'property': 'short_name', 'type': 'STRING'}, {'property': 'identifier', 'type': 'STRING'}, {'property': 'metadata', 'type': 'LIST'}, {'property': 'embedding', 'type': 'LIST'}], 'labels': 'Study'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'StudyCenter'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Indication'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'SubIndication'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Contact'}, {'properties': [{'property': 'description', 'type': 'STRING'}], 'labels': 'Criteria'}]
        Relationship properties are the following:
        [{'type': 'HAS_CONTACT', 'properties': [{'property': 'email', 'type': 'STRING'}]}]
        The relationships are the following:
        ['(:Study)-[:HAS_CRITERIA]->(:Criteria)', '(:Study)-[:CONDUCTED_AT]->(:StudyCenter

In [113]:
from langchain.chains import GraphCypherQAChain
neo4j_graph = Neo4jGraph(url="bolt://localhost:7687", username="neo4j", password="password")
chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True
)
chain.run("What is the most common study location?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Study)-[:CONDUCTED_AT]->(sc:StudyCenter)
RETURN sc.name AS study_location, count(*) AS count
ORDER BY count DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'study_location': 'University Clinic Düsseldorf', 'count': 13}][0m

[1m> Finished chain.[0m


'The most common study location is the University Clinic Düsseldorf, with a count of 13.'

In [115]:
# Vector + Knowledge Graph response
kg = Neo4jVector.from_existing_index(
    embedding="embedding",
    url="bolt://localhost:7687",
    username="neo4j",
    password="password",
    database="neo4j",  # neo4j by default
    index_name="study_data",  # vector by default
    text_node_property="body",  # text by default
    retrieval_query="""
    MATCH (s:Study)
WITH s, s.score AS similarity
CALL {
    WITH s
    OPTIONAL MATCH (study)-[:HAS_INDICATION]->(ind:Indication)
    OPTIONAL MATCH (study)-[:HAS_SUBINDICATION]->(sub:SubIndication)
    OPTIONAL MATCH (study)-[relContact:HAS_CONTACT]->(con:Contact)
    OPTIONAL MATCH (study)-[:CONDUCTED_AT]->(sc:StudyCenter)
    OPTIONAL MATCH (study)-[:HAS_CRITERIA]->(criteria)
    WITH study, ind, sub, 
        COLLECT(DISTINCT con.name) AS ContactNames, 
        COLLECT(DISTINCT relContact.email) AS ContactEmails,
        COLLECT(DISTINCT sc.name) AS StudyCenterNames, 
        COLLECT(DISTINCT criteria.value) AS CriteriaValues
    RETURN 
    '##Study Name: ' + study.name + 
    '\nIdentifier: ' + study.identifier + 
    COALESCE('\nIndication: ' + ind.name, '') + 
    COALESCE('\nSub-Indication: ' + sub.name, '') + 
    '\nContact Names: ' + reduce(str='', name IN ContactNames | str + name + '; ') + 
    '\nContact Emails: ' + reduce(str='', email IN ContactEmails | str + email + '; ') + 
    '\nStudy Center Names: ' + reduce(str='', name IN StudyCenterNames | str + name + '; ') + 
    '\nCriteria Values: ' + reduce(str='', value IN CriteriaValues | str + value + '; ') AS text, ind, sub
} 
WITH text, s, similarity, ind, sub
RETURN text,
       similarity,
       s.identifier AS StudyIdentifier,
       COALESCE(ind.name, "N/A") AS Indication,
       COALESCE(sub.name, "N/A") AS SubIndication,
       s.embedding AS embedding,
       s.metadata AS metadata
ORDER BY similarity ASC

""",
)


ValueError: Cannot resolve address database:7687

#### Original RAGChain as ref:

In [None]:
def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password):
    # RAG response
    #   System: Always talk in pirate speech.
    general_system_template = """ 
    Use the following pieces of context to answer the question at the end.
    The context contains question-answer pairs and their links from Stackoverflow.
    You should prefer information from accepted or more upvoted answers.
    Make sure to rely on information from the answers and not on questions to provide accuate responses.
    When you find particular answer in the context useful, make sure to cite it in the answer using the link.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    ----
    {summaries}
    ----
    Each answer you generate should contain a section at the end of links to 
    Stackoverflow questions and answers you found useful, which are described under Source value.
    You can only use links to StackOverflow questions that are present in the context and always
    add links to the end of the answer in the style of citations.
    Generate concise answers with references sources section of links to 
    relevant StackOverflow questions only at the end of the answer.
    """
    general_user_template = "Question:```{question}```"
    messages = [
        SystemMessagePromptTemplate.from_template(general_system_template),
        HumanMessagePromptTemplate.from_template(general_user_template),
    ]
    qa_prompt = ChatPromptTemplate.from_messages(messages)

    qa_chain = load_qa_with_sources_chain(
        llm,
        chain_type="stuff",
        prompt=qa_prompt,
    )

    # Vector + Knowledge Graph response
    kg = Neo4jVector.from_existing_index(
        embedding=embeddings,
        url=embeddings_store_url,
        username=username,
        password=password,
        database="neo4j",  # neo4j by default
        index_name="stackoverflow",  # vector by default
        text_node_property="body",  # text by default
        retrieval_query="""
    WITH node AS question, score AS similarity
    CALL  { with question
        MATCH (question)<-[:ANSWERS]-(answer)
        WITH answer
        ORDER BY answer.is_accepted DESC, answer.score DESC
        WITH collect(answer)[..2] as answers
        RETURN reduce(str='', answer IN answers | str + 
                '\n### Answer (Accepted: '+ answer.is_accepted +
                ' Score: ' + answer.score+ '): '+  answer.body + '\n') as answerTexts
    } 
    RETURN '##Question: ' + question.title + '\n' + question.body + '\n' 
        + answerTexts AS text, similarity as score, {source: question.link} AS metadata
    ORDER BY similarity ASC // so that best answers are the last
    """,
    )

    kg_qa = RetrievalQAWithSourcesChain(
        combine_documents_chain=qa_chain,
        retriever=kg.as_retriever(search_kwargs={"k": 2}),
        reduce_k_below_max_tokens=False,
        max_tokens_limit=3375,
    )
    return kg_qa