## Query with Memory from txt File 

In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

In [6]:
from langchain.document_loaders import TextLoader
loader = TextLoader("test_data.txt")
documents = loader.load()


In [7]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)

Created a chunk of size 1049, which is longer than the specified 1000
Created a chunk of size 1049, which is longer than the specified 1000


In [8]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [9]:
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)

In [10]:
query = "From whom is the Document?"
result = qa({"question": query})

In [11]:
result["answer"]

' The document is from Linea Schmidt.'

In [12]:
query2 = "Is there any information about her profession?"
result2 = qa({"question": query2})

In [13]:
result2["answer"]

" I don't know."

In [14]:
query3 = "What is that document about then?"
result3 = qa({"question": query3})

In [15]:
result3["answer"]

' The document is a test data file.'

In [16]:
result3

{'question': 'What is that document about then?',
 'chat_history': [HumanMessage(content='From whom is the Document?'),
  AIMessage(content=' The document is from Linea Schmidt.'),
  HumanMessage(content='Is there any information about her profession?'),
  AIMessage(content=" I don't know."),
  HumanMessage(content='What is that document about then?'),
  AIMessage(content=' The document is a test data file.')],
 'answer': ' The document is a test data file.'}

## Query with Memory from Web

In [17]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [19]:
from langchain.memory import ConversationSummaryMemory
llm = OpenAI(temperature=0)
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)

In [20]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [21]:
qa("How do agents use Task decomposition?")

{'question': 'How do agents use Task decomposition?',
 'chat_history': [SystemMessage(content='')],
 'answer': ' Task decomposition for agents involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. It can be done by using LLM with simple prompting, task-specific instructions, or human inputs. The agent can also do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.'}

In [22]:
qa("What are the various ways to implement memory to support it?")

{'question': 'What are the various ways to implement memory to support it?',
 'chat_history': [SystemMessage(content='\nThe human asks how agents use Task decomposition. The AI explains that Task decomposition involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. It can be done by using LLM with simple prompting, task-specific instructions, or human inputs. The agent can also do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.')],
 'answer': ' Memory can be implemented to support task decomposition by using long-term memory management, GPT-3.5 powered agents for delegation of simple tasks, and file output.'}

## Query from Neo4J database 

In [23]:
from neo4j import GraphDatabase

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self._driver = GraphDatabase.driver(uri, auth=(user, pwd))

    def close(self):
        self._driver.close()

In [24]:
class Neo4jLoader:
    def __init__(self, connection, query):
        self.connection = connection
        self.query = query

    def load(self):
        with self.connection._driver.session() as session:
            results = session.run(self.query)
            data = [record for record in results]
        return data

In [25]:
class Neo4jService(object):

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def fetch_data(self):
        with self._driver.session() as session:
            return session.read_transaction(self._fetch_data_from_db)

    @staticmethod
    def _fetch_data_from_db(tx):
        query = """
        MATCH (s:Study)
        OPTIONAL MATCH (s)-[:HAS_INDICATION]->(ind:Indication)
        OPTIONAL MATCH (s)-[:HAS_SUBINDICATION]->(sub:SubIndication)
        OPTIONAL MATCH (s)-[relContact:HAS_CONTACT]->(con:Contact)
        OPTIONAL MATCH (s)-[:CONDUCTED_AT]->(sc:StudyCenter)
        OPTIONAL MATCH (s)-[:HAS_CRITERIA]->(criteria)
        WITH s, ind, sub, 
            COLLECT(DISTINCT con.name) AS ContactNames, 
            COLLECT(DISTINCT relContact.email) AS ContactEmails,
            COLLECT(DISTINCT sc.name) AS StudyCenterNames, 
            COLLECT(DISTINCT criteria.value) AS CriteriaValues
        RETURN 
        s.name AS StudyName,
        s.identifier AS Identifier,
        ind.name AS Indication,
        sub.name AS SubIndication,
        ContactNames,
        ContactEmails,
        StudyCenterNames,
        CriteriaValues,
        s.embedding AS Embedding,
        s.metadata AS Metadata

        """
        result = tx.run(query)
        return [{
            "StudyName": record["StudyName"],
            "Identifier": record["Identifier"],
            "Indication": record["Indication"],
            "SubIndication": record["SubIndication"],
            "ContactNames": ", ".join(record["ContactNames"]) if record["ContactNames"] else None,  # Joining list to string
            "ContactEmails": ", ".join(record["ContactEmails"]) if record["ContactEmails"] else None,  # Joining list to string
            "StudyCenterNames": ", ".join(record["StudyCenterNames"]) if record["StudyCenterNames"] else None,  # Joining list to string
            "CriteriaValues": ", ".join(record["CriteriaValues"]) if record["CriteriaValues"] else None,  # Joining list to string
            "Embedding": record["Embedding"],
            "Metadata": record["Metadata"],
            "SummaryText": " ".join([
                f"Study Name: {record['StudyName']}",
                f"Identifier: {record['Identifier']}",
                f"Indication: {record['Indication']}",
                f"Sub-Indication: {record['SubIndication']}",
                f"Contact Names: {', '.join(record['ContactNames']) if record['ContactNames'] else 'N/A'}",  # Joining list to string
                f"Contact Emails: {', '.join(record['ContactEmails']) if record['ContactEmails'] else 'N/A'}",  # Joining list to string
                f"Study Center Names: {', '.join(record['StudyCenterNames']) if record['StudyCenterNames'] else 'N/A'}",  # Joining list to string
                f"Criteria Values: {', '.join(record['CriteriaValues']) if record['CriteriaValues'] else 'N/A'}"  # Joining list to string
            ])
        } for record in result]



In [26]:
# Use your Neo4j credentials here
neo_service = Neo4jService(uri="bolt://localhost:7687", user="neo4j", password="password")
data = neo_service.fetch_data()
neo_service.close()

  return session.read_transaction(self._fetch_data_from_db)


In [27]:
data[2]

{'StudyName': "Relapsing Forms of Multiple Sclerosis (RMS) Study of Bruton's Tyrosine Kinase (BTK) Inhibitor Tolebrutinib (SAR442168) (GEMINI 1)",
 'Identifier': 'NCT04410978',
 'Indication': 'Multiple Sclerosis',
 'SubIndication': 'Relapsing Forms of Multiple Sclerosis (RMS)',
 'ContactNames': 'Marc Pawlitzki',
 'ContactEmails': 'marcguenter.pawlitzki@med.uni-duesseldorf.de',
 'StudyCenterNames': 'University Clinic Düsseldorf',
 'CriteriaValues': None,
 'Embedding': [-0.03976777433307709,
  -0.0020321362426919125,
  -0.009751431312557965,
  -0.02449853076228362,
  -0.014246121496091786,
  0.02633309833618682,
  -0.023665919095648157,
  -0.010090120725145056,
  -0.047585854552244726,
  -0.012108144870174068,
  0.006745563125093482,
  0.01735782936829017,
  0.011367261954762781,
  0.011600110285632159,
  -0.017922311412161142,
  0.007789855558180559,
  0.03138521579532866,
  -0.002492542061064087,
  -0.009306901749575701,
  -0.0014870579547576477,
  -0.003993712229899954,
  0.0018116352

In [64]:

loader = 
data = loader.load()


ImportError: jq package not found, please install it with `pip install jq`

In [41]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)

retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

response = qa("How do agents use Task decomposition?")
print(response)

AttributeError: 'NoneType' object has no attribute 'page_content'

## Direct conn to NEO4j

In [32]:
from langchain.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector


In [33]:
graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password="password")

In [34]:
print(graph.schema) #works - Langchain gets good connection as it seems


        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}, {'property': 'metadata', 'type': 'LIST'}, {'property': 'embedding', 'type': 'LIST'}, {'property': 'identifier', 'type': 'STRING'}, {'property': 'short_name', 'type': 'STRING'}], 'labels': 'Study'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'StudyCenter'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Indication'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'SubIndication'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Contact'}, {'properties': [{'property': 'description', 'type': 'STRING'}], 'labels': 'Criteria'}]
        Relationship properties are the following:
        [{'type': 'HAS_CONTACT', 'properties': [{'property': 'email', 'type': 'STRING'}]}]
        The relationships are the following:
        ['(:Study)-[:HAS_CRITERIA]->(:Criteria)', '(:Study)-[:CONDUCTED_AT]->(:StudyCenter

In [40]:
# Vector + Knowledge Graph response
kg = Neo4jVector.from_existing_index(
    embedding=embeddings,
    url="bolt://localhost:7687",
    username="neo4j",
    password="password",
    database="neo4j",  # neo4j by default
    index_name="study_data",  # vector by default
    text_node_property="body",  # text by default
    retrieval_query="""
    WITH s AS study, s.score AS similarity  // Assuming you're using score as the similarity score; adjust as needed.
    CALL {
        WITH study
        OPTIONAL MATCH (study)-[:HAS_INDICATION]->(ind:Indication)
        OPTIONAL MATCH (study)-[:HAS_SUBINDICATION]->(sub:SubIndication)
        OPTIONAL MATCH (study)-[relContact:HAS_CONTACT]->(con:Contact)
        OPTIONAL MATCH (study)-[:CONDUCTED_AT]->(sc:StudyCenter)
        OPTIONAL MATCH (study)-[:HAS_CRITERIA]->(criteria)
        WITH study, ind, sub, 
            COLLECT(DISTINCT con.name) AS ContactNames, 
            COLLECT(DISTINCT relContact.email) AS ContactEmails,
            COLLECT(DISTINCT sc.name) AS StudyCenterNames, 
            COLLECT(DISTINCT criteria.value) AS CriteriaValues
        RETURN 
        '##Study Name: ' + study.name + 
        '\nIdentifier: ' + study.identifier + 
        COALESCE('\nIndication: ' + ind.name, '') + 
        COALESCE('\nSub-Indication: ' + sub.name, '') + 
        '\nContact Names: ' + reduce(str='', name IN ContactNames | str + name + '; ') + 
        '\nContact Emails: ' + reduce(str='', email IN ContactEmails | str + email + '; ') + 
        '\nStudy Center Names: ' + reduce(str='', name IN StudyCenterNames | str + name + '; ') + 
        '\nCriteria Values: ' + reduce(str='', value IN CriteriaValues | str + value + '; ') AS text
    } 
    RETURN text,
        similarity,
        {
            "StudyIdentifier": study.identifier,
            "Indication": COALESCE(ind.name, "N/A"),
            "SubIndication": COALESCE(sub.name, "N/A"),
            "Embedding": study.embedding,
            "Metadata": study.metadata
        } AS metadata
    ORDER BY similarity ASC

""",
)


ValueError: Cannot resolve address database:7687

#### Original RAGChain as ref:

In [None]:
def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password):
    # RAG response
    #   System: Always talk in pirate speech.
    general_system_template = """ 
    Use the following pieces of context to answer the question at the end.
    The context contains question-answer pairs and their links from Stackoverflow.
    You should prefer information from accepted or more upvoted answers.
    Make sure to rely on information from the answers and not on questions to provide accuate responses.
    When you find particular answer in the context useful, make sure to cite it in the answer using the link.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    ----
    {summaries}
    ----
    Each answer you generate should contain a section at the end of links to 
    Stackoverflow questions and answers you found useful, which are described under Source value.
    You can only use links to StackOverflow questions that are present in the context and always
    add links to the end of the answer in the style of citations.
    Generate concise answers with references sources section of links to 
    relevant StackOverflow questions only at the end of the answer.
    """
    general_user_template = "Question:```{question}```"
    messages = [
        SystemMessagePromptTemplate.from_template(general_system_template),
        HumanMessagePromptTemplate.from_template(general_user_template),
    ]
    qa_prompt = ChatPromptTemplate.from_messages(messages)

    qa_chain = load_qa_with_sources_chain(
        llm,
        chain_type="stuff",
        prompt=qa_prompt,
    )

    # Vector + Knowledge Graph response
    kg = Neo4jVector.from_existing_index(
        embedding=embeddings,
        url=embeddings_store_url,
        username=username,
        password=password,
        database="neo4j",  # neo4j by default
        index_name="stackoverflow",  # vector by default
        text_node_property="body",  # text by default
        retrieval_query="""
    WITH node AS question, score AS similarity
    CALL  { with question
        MATCH (question)<-[:ANSWERS]-(answer)
        WITH answer
        ORDER BY answer.is_accepted DESC, answer.score DESC
        WITH collect(answer)[..2] as answers
        RETURN reduce(str='', answer IN answers | str + 
                '\n### Answer (Accepted: '+ answer.is_accepted +
                ' Score: ' + answer.score+ '): '+  answer.body + '\n') as answerTexts
    } 
    RETURN '##Question: ' + question.title + '\n' + question.body + '\n' 
        + answerTexts AS text, similarity as score, {source: question.link} AS metadata
    ORDER BY similarity ASC // so that best answers are the last
    """,
    )

    kg_qa = RetrievalQAWithSourcesChain(
        combine_documents_chain=qa_chain,
        retriever=kg.as_retriever(search_kwargs={"k": 2}),
        reduce_k_below_max_tokens=False,
        max_tokens_limit=3375,
    )
    return kg_qa