In [1]:
from qdrant_client import QdrantClient, models
from FlagEmbedding import BGEM3FlagModel
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
from langchain_core.chat_history import InMemoryChatMessageHistory, BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from utils.utils import convert_defaultdict, format_docs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
def retrieve(query, embeddings, client):
    res = embeddings.encode([query], return_sparse=True, return_colbert_vecs=True)
    result = client.query_points(
        "semantic_summary_vectorstore",
        prefetch=[
            models.Prefetch(
                query=res['dense_vecs'][0],
                using="dense",
                limit=20
            ),
            models.Prefetch(
                query=models.SparseVector(**convert_defaultdict(res['lexical_weights'][0])),
                using="sparse",
                limit=20
            ),
            models.Prefetch(
                query=res['colbert_vecs'][0],
                using='colbert',
                limit=20
            )
        ],
        query=models.FusionQuery(
            fusion=models.Fusion.RRF,
        ),
        limit=10
    )

    relevant_docs = []
    for point in result.points:
        doc = client.scroll(
            collection_name="semantic_original",
            scroll_filter=models.Filter(
                must=[
                    models.FieldCondition(
                        key="doc_id",
                        match=models.MatchValue(value=point.id)
                    )
                ]
            )
        )

        temp_payload = doc[0][0].payload
        res_doc = Document(page_content=temp_payload['page_content'], metadata={'source':temp_payload['source'], 'doc_id': temp_payload['doc_id'], 'title': temp_payload['title']})
        relevant_docs.append(res_doc)
    
    
    return relevant_docs

In [4]:
client = QdrantClient("http://localhost:6333")
llm = ChatGroq(model="llama3-70b-8192")
embeddings = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)

Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 17452.03it/s]
  colbert_state_dict = torch.load(os.path.join(model_dir, 'colbert_linear.pt'), map_location='cpu')
  sparse_state_dict = torch.load(os.path.join(model_dir, 'sparse_linear.pt'), map_location='cpu')


In [67]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

In [68]:
session_id = "first_chat"
config = {"configurable": {"session_id": session_id}}

In [69]:
system_reformulate_prompt = """Using the provided chat history (if available) and the most recent user question, reformulate the question into a standalone version that is understandable without the context of the chat history. Only reformulate if necessary; otherwise, return the original question. Your response should only be the reformulated or original question."""

reformulate_prompt = ChatPromptTemplate.from_messages([
    ("system", system_reformulate_prompt),
    MessagesPlaceholder("messages")
])

In [70]:
system_prompt = """You are an AI chatbot designed to answer questions about insomnia using context retrieved from scientific articles. Your goal is to provide clear, evidence-based responses and practical advice to help users. Following those instructions:

1. Use only the information provided with the <context> tags regarding insomnia.
2. Provide concise, informative answers.
3. If a question is unclear or needs more context, ask the user for clarification.

<context>
{context}
</context>
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder("messages"),
    ("human", "{query}")
])

In [71]:
reformulate_chain = reformulate_prompt | llm | StrOutputParser()
final_chain = prompt | llm | StrOutputParser()
final_chain_with_memory = RunnableWithMessageHistory(final_chain, get_session_history, input_messages_key="query", history_messages_key="messages")

In [72]:
query = "How does caffeine affect the human body?"
if not store.get(session_id, []):
    history = []
else:
    history = store[session_id].messages

reformulate_query = reformulate_chain.invoke({"messages": history + [HumanMessage(query)]})

In [73]:
reformulate_query

'How does caffeine affect the human body?'

In [74]:
relevant_docs = retrieve(reformulate_query, embeddings=embeddings, client=client)
context = format_docs(relevant_docs[:2])

In [75]:
chain_with_history = RunnableWithMessageHistory(prompt | llm, get_session_history, history_messages_key="messages", input_messages_key="query")

In [76]:
chain_with_history.invoke({"query": query, "context": context}, config=config)

AIMessage(content='According to the provided context, caffeine affects the human body in several ways:\n\n1. **Blocks adenosine receptors**: Caffeine acts as a competitor of adenosine in its receptors, inhibiting the negative effects of adenosine on neurotransmission, excitation, and pain perception. This leads to an increased release of dopamine, noradrenaline, and glutamate.\n\n2. **Increases muscle contraction**: Caffeine induces calcium release from intracellular storages, increasing contractility during submaximal contractions, making it an ergogenic aid in various exercises.\n\n3. **Inhibits phosphodiesterases**: Caffeine acts as a nonselective competitive inhibitor of phosphodiesterases, which stimulates lipolysis, but requires very high doses.\n\n4. **Affects endothelial function**: Caffeine works as a nitric oxide (NO) stimulator, NO inhibitor, and inhibitor of NO second messenger cyclic guanosine monophosphate (cGMP), influencing endothelial dysfunction and vascular smooth mu

In [77]:
store['first_chat'].messages


[HumanMessage(content='How does caffeine affect the human body?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='According to the provided context, caffeine affects the human body in several ways:\n\n1. **Blocks adenosine receptors**: Caffeine acts as a competitor of adenosine in its receptors, inhibiting the negative effects of adenosine on neurotransmission, excitation, and pain perception. This leads to an increased release of dopamine, noradrenaline, and glutamate.\n\n2. **Increases muscle contraction**: Caffeine induces calcium release from intracellular storages, increasing contractility during submaximal contractions, making it an ergogenic aid in various exercises.\n\n3. **Inhibits phosphodiesterases**: Caffeine acts as a nonselective competitive inhibitor of phosphodiesterases, which stimulates lipolysis, but requires very high doses.\n\n4. **Affects endothelial function**: Caffeine works as a nitric oxide (NO) stimulator, NO inhibitor, and inhibitor of NO se

In [78]:
query = "Given the impact on the nervous system you mentioned, how would that affect someone with anxiety?"
if not store.get(session_id, []):
    history = []
else:
    history = store[session_id].messages

reformulate_query = reformulate_chain.invoke({"messages": history + [HumanMessage(query)]})

In [79]:
reformulate_query

'How does caffeine affect someone with anxiety?'

In [73]:
response = final_chain_with_memory.invoke(input={"context": context, "history": query}, config=config)

KeyError: 'input'

In [36]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "You're an assistant who's good at {ability}"),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}"),
])

chain = prompt | llm

chain_with_history = RunnableWithMessageHistory(
    chain,
    # Uses the get_by_session_id function defined in the example
    # above.
    get_session_history,
    input_messages_key="question",
    history_messages_key="history",
)

print(chain_with_history.invoke(  # noqa: T201
    {"ability": "math", "question": "What does cosine mean?"},
    config={"configurable": {"session_id": "foo"}}
))

# Uses the store defined in the example above.
print(store)  # noqa: T201

print(chain_with_history.invoke(  # noqa: T201
    {"ability": "math", "question": "What's its inverse"},
    config={"configurable": {"session_id": "foo"}}
))

print(store)  # noqa: T201

content="The cosine!\n\nIn mathematics, the cosine is a fundamental concept in trigonometry. It's one of the three primary trigonometric ratios, along with the sine and tangent.\n\nThe cosine of an angle in a right triangle is defined as the ratio of the length of the adjacent side (the side adjacent to the angle) to the length of the hypotenuse (the side opposite the right angle).\n\nIn other words, if you have a right triangle with an angle θ (theta), the cosine of θ is:\n\ncos(θ) = adjacent side / hypotenuse\n\nFor example, if you have a right triangle with an angle of 30 degrees, and the adjacent side is 3 units long, and the hypotenuse is 5 units long, then:\n\ncos(30°) = 3/5 = 0.6\n\nThe cosine function has many real-world applications, such as:\n\n1. Navigation: Cosine is used to calculate distances and directions between locations on the surface of the Earth.\n2. Physics: Cosine is used to describe the motion of objects in terms of their position, velocity, and acceleration.\n3