In [2]:
import os
import warnings
from langchain._api import LangChainDeprecationWarning
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq
warnings.simplefilter("ignore", category=LangChainDeprecationWarning)
_ = load_dotenv(find_dotenv())
llm = ChatGroq(model="llama3-70b-8192")

## 1- Create a basic RAG with memory
- We will use the RAG process we already know.
- We will use create_stuff_documents_chain to build a qa chain: a chain able to asks questions to an LLM.
- We will use create_retrieval_chain and the qa chain to build the RAG chain: a chain able to asks questions to the retriever and then format the response with the LLM.

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [5]:
loader = PyPDFLoader("../data/Be_Good.pdf")
doc = loader.load()

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

spliter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text = spliter.split_documents(doc)

db = Chroma.from_documents(text, embeddings)
retriever = db.as_retriever()

  from tqdm.autonotebook import tqdm, trange


In [None]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answering = create_stuff_documents_chain(llm=llm, prompt=prompt)

rag_chain = create_retrieval_chain(retriever, question_answering)

response = rag_chain.invoke({"input": "What is this article about?"})

print(response["answer"])

This article, "Be Good" by Paul Graham, discusses the importance of creating something people want and not worrying too much about making money, at least initially. It explores the idea that this approach can lead to a surprising similarity between businesses and charities, and how helping others can ultimately lead to profit.


## 2- Create a ChatPrompTemplate able to contextualize inputs.
- Goal: put the input in context and re-phrase it so we have a contextualized input.
- We will define a new system prompt that instructs the LLM in how to contextualize the input.
- Our new ChatPromptTemplate will include:
  - The new system prompt.
  - MessagesPlaceholder, a placeholder used to pass the list of messages included in the chat_history.

In [None]:
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as it is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),-
    ]
)

## 3- Create a Retriever aware of the memory
- We will build our new retriever with create_history_aware_retriever that uses the contextualized input to get a contextualized response

In [8]:
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

## 4- Create a basic Conversational RAG
- We will use the retriever aware of memory, that uses the prompt with contextualized input.
- We will use create_stuff_documents_chain to build a qa chain: a chain able to asks questions to an LLM.
- We will use create_retrieval_chain and the qa chain to build the RAG chain: a chain able to asks questions to the retriever and then format   the response with the LLM.

In [10]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answering = create_stuff_documents_chain(llm=llm, prompt=qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answering)

In [11]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is this article about?"

ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "What was my previous question about?"

ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Your previous question was "What is this article about?"


## 5- Advanced conversational RAG with persistence and session memories
- We will store the chat history in a python dictionary. In advanced apps, you will use advanced ways to store chat history.
- Associate chat history with user session with the function get_session_history().
- Inject chat history into inputs and update it after each interaction using BaseChatMessageHistory and RunnableWithMessageHistory.

In [12]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

chat_history = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in chat_history:
        chat_history[session_id] = ChatMessageHistory()
    return chat_history[session_id]

chat_with_message_history = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [13]:
session_1 = {"configurable":{"session_id":"ertan_1"}}
response = chat_with_message_history.invoke({"input":"What is the artice about?"}, config=session_1)

In [15]:
response["answer"]

'The article "Be Good" by Paul Graham is about the benefits of starting a company with benevolent aims, and how doing good for people can give a sense of mission and make others want to help you.'

In [16]:
response_2 = chat_with_message_history.invoke({"input":"what was my previous quesiton?"}, config=session_1)
response_2["answer"]

'Your previous question was "What is the article about?"'