Notebook that contains the code for the reranker RAG pipeline.

In [1]:
from src.vectorstore import connect_to_milvus, activate_collection
from src.retrievers import MilvusRerankRetriever

import os
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [None]:
connect_to_milvus()
collection = activate_collection()
collection.load()

In [2]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

In [3]:
rerank_retriever = MilvusRerankRetriever()

In [10]:
# Template

"""If you do not have the answer in the CONTEXT write "I don't know".
Do not make things up. Do not fabricate facts. """
RAG_TEMPLATE = """\
You are a Huberman Lab podcast listener. 
You will be provided a CONTEXT of the podcasts and a QUESTION.
Based on the CONTEXT answer the QUESTION.


Context:
{context}

Query:
{question}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

# Setup a chat model
chat_model = ChatOpenAI()
# Setup a retriever
retriever = MilvusRerankRetriever()

# Setup a chain
rerank_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [11]:
res = rerank_chain.invoke({"question" : "How much caffeine a day is the maximum limit?"})

In [12]:
res

{'response': AIMessage(content='Based on the context provided, the maximum limit of caffeine intake per day is recommended to be between 300 to 600 milligrams for individuals of sufficient body weight and who are accustomed to taking caffeine.', response_metadata={'token_usage': {'completion_tokens': 40, 'prompt_tokens': 3809, 'total_tokens': 3849}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-89551d2e-4ce1-4c16-b8e0-47758a7c4b66-0'),
 'context': [Document(page_content="And I should mention this study, they use this 300 milligrams per day or 600 milligrams per day.\nSo that's quite high, although for people of sufficient body weight and who are accustomed to taking caffeine, it's certainly not going to be in excess of what a lot of people out there are taking.\nBut basically what they observed was the following.\nCortisol responses to caffeine are reduced, but not eliminated in people who consume caffeine on a