In [1]:
import os
from langchain.storage import InMemoryStore
import uuid
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import pickle
import time
from langchain_core.messages import AIMessage, HumanMessage

import APIKEY
os.environ["OPENAI_API_KEY"] = APIKEY.API_KEY_SERVICE_OPENAI

In [2]:
DOC_SAVE_NAME = r"doc_faiss_multi\DS_NAU8822A_DataSheet_3.5"

# Load summaries pickle
SUM_list = []
with open(DOC_SAVE_NAME + r".pickle", "rb") as f:
    SUM_list = pickle.load(f)
texts_list = SUM_list[0]
tables_list = SUM_list[1]
print(len(texts_list), len(tables_list))

96 75


In [3]:
# Load faiss vectorsore
embedding=OpenAIEmbeddings()
vectorstore=FAISS.load_local(DOC_SAVE_NAME, embedding)

  warn_deprecated(


In [4]:
# The storage layer for the parent documents
store = InMemoryStore()
id_key = "doc_id"

# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    docstore=store,
    id_key=id_key,
)

# Add texts
retriever.docstore.mset(texts_list)

# Add tables
retriever.docstore.mset(tables_list)

## RAG

In [13]:


# Prompt template
template = """Answer the question based only on the following NAU8822A CODEC context, which can include text and tables:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# LLM
model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")

# RAG pipeline
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

  warn_deprecated(


In [49]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableParallel

# LLM
model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")

# Contextualizing the question
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | model | StrOutputParser()

# Prompt template
qa_system_prompt = """Answer the question based only on the following NAU8822A CODEC context, which can include text and tables:
{context}
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

# RAG pipeline
def format_docs(docs):
    return "\n**\n".join(doc for doc in docs)
    
def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]
    
rag_chain = (
    #RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | prompt
    | model
)

#rag_chain_with_source = RunnableParallel(
#    {"context": retriever, "question": RunnablePassthrough()}
#).assign(answer=rag_chain)

#rag_chain = (
#    {"context": retriever|format_docs, "question": contextualized_question}
#    | prompt
#    | model
#)


#rag_chain_from_docs = (
#    RunnablePassthrough.assign(context=contextualized_question | retriever | (lambda x: format_docs(x["context"]))
#    | prompt
#    | model
#    | StrOutputParser()
#)

#rag_chain_from_docs = (
#    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
#    | prompt
#    | model
#    | StrOutputParser()
#)
#
#rag_chain_with_source = RunnableParallel(
#    {"context": retriever, "question": RunnablePassthrough()}
#).assign(answer=rag_chain_from_docs)



In [50]:
chat_history = []

question = "what is the support sample rate of NAU8822A?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])



#chat_history = []
#question = "What is the support sample rate?"
#result = rag_chain_with_source.invoke({"question": question, "chat_history": chat_history})

In [51]:
print(chat_history)

[HumanMessage(content='what is the support sample rate of NAU8822A?'), AIMessage(content='The NAU8822A supports any sample rate from 8kHz to 48kHz.')]


In [52]:
second_question = "List all the sample rate and the registers?"
ai_msg = rag_chain.invoke({"question": second_question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=second_question), ai_msg])

In [54]:
print(ai_msg)

content='The NAU8822A CODEC supports any sample rate from 8kHz to 48kHz, but the specific sample rates are not listed in the provided context. Additionally, there are no registers provided that directly correspond to setting specific sample rates. Sample rates are typically determined by the system clock and the settings of the PLL (Phase Locked Loop) and other clock-dividing registers.\n\nTo configure the sample rate, you would typically use the PLL N, PLL K1, PLL K2, and PLL K3 registers (registers 36 to 39) to set up the PLL to generate the required system clock that corresponds to the desired sample rate. The actual sample rate would be a function of the system clock frequency and the settings of these registers.\n\nHere are the relevant registers for the PLL configuration:\n\n- Register 36 (0x24): PLL N\n- Register 37 (0x25): PLL K1\n- Register 38 (0x26): PLL K2\n- Register 39 (0x27): PLL K3\n\nThese registers are used to configure the PLL to generate a specific clock frequency th

In [23]:
print(chat_history)

[HumanMessage(content='Hi my name is Gary, what is the support sample rate of NAU8822A?'), AIMessage(content='Hello Gary, the NAU8822A CODEC supports any sample rate from 8kHz to 48kHz.'), HumanMessage(content='What is my name?'), AIMessage(content='Your name is Gary.')]


In [27]:
rag_chain.get_prompts()

[ChatPromptTemplate(input_variables=['chat_history', 'context', 'question'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='Answer the question based only on the following NAU8822A CODEC context, which can include text and tables:\n{context}\n')), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))])]

In [23]:
print(result['answer'])
print(result['context'])

The supported sample rates for the NAU8822A CODEC, as indicated in the provided context, are 8 kHz, 11.025 kHz, 12 kHz, 16 kHz, 22.05 kHz, 24 kHz, 32 kHz, 44.1 kHz, and 48 kHz. These sample rates are derived from the internal Master Clock (IMCLK) frequency, which is 128 times the base audio sample rate for the ADC. The DAC's desired sample rate in the provided PLL design example is 48.000 kHz.
['Register,Unnamed: 0,Sample Rate in kHz (FS),Unnamed: 1\r\n"14, bits 4 to",R7(SMPLR) = 101or  100,R7(SMPLR) = 011 or 010,R7(SMPLR) = 001 or 000\r\n6,,,\r\n(HPF),8 11.025 12,16 22.05 24,32 44.1 48\r\n000,82 113 122,82 113 122,82 113 122\r\n001,102 141 153,102 141 153,102 141 153\r\n010,131 180 156,131 180 156,131 180 156\r\n011,163 225 245,163 225 245,163 225 245\r\n100,204 281 306,204 281 306,204 281 306\r\n101,261 360 392,261 360 392,261 360 392\r\n110,327 450 490,327 450 490,327 450 490\r\n111,408 563 612,408 563 612,408 563 612\r\n', 'The frequency of the system clock provided as the PLL refe