In [9]:
import os
from dotenv import load_dotenv
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

load_dotenv()

# load, chunk and index the contents of the blog to create a retriever.
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/"),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-title", "post-header", "post-content")
        )
    )
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vector_store = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vector_store.as_retriever()

# prompt template
system_prompt = (
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

groq_api_key = os.getenv("GROQ_API_KEY")
llm = ChatGroq(api_key=groq_api_key, model="Llama3-8b-8192")

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input":"What is Self-Reflection"})
print(response["answer"])
print("*"*30)

response = rag_chain.invoke({"input":"How do we achieve it ?"})
print(response["answer"])
print("*"*30)

# there is a need to save chat history and pass it along with every query
# to get relevent responses.

from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. DO NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

system_prompt = (
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

chat_history = []
question1 = "What is Self-Reflection?"
response1 = rag_chain.invoke({"input":question1, "chat_history":chat_history})
print(response1["answer"])
print("*"*30)

chat_history.extend(
    [
        HumanMessage(content=question1),
        AIMessage(content=response1["answer"])
    ]
)

question2 = "How do we achieve it ?"
response2 = rag_chain.invoke({"input":question2, "chat_history":chat_history})
print(response2["answer"])
print("*"*30)

# another way of adding histories
store = {}

def get_session_history(session_id):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

response1 = conversational_rag_chain.invoke(
    {"input": "What is Self-Reflection?"},
    config={
        "configurable": {"session_id": "abc123"}
    }, # constructs a key "abc123" in store
)
print(response1["answer"])
print("*"*30)

response2 = conversational_rag_chain.invoke(
    {"input": "How do we achieve it ?"},
    config={
        "configurable": {"session_id": "abc123"}
    }, # constructs a key "abc123" in store
)
print(response2["answer"])
print("*"*30)


Self-Reflection in the Reflexion framework is created by showing two-shot examples to LLM, where each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan).
******************************
Task decomposition in Tree of Thoughts can be achieved through three methods: (1) using a Large Language Model (LLM) with simple prompting, (2) using task-specific instructions, or (3) with human inputs.
******************************
Self-reflection in the Reflexion framework is created by showing two-shot examples to LLM, where each example is a pair of a failed trajectory and an ideal reflection for guiding future changes in the plan. Then, these reflections are added into the agent's working memory, up to three, to be used as context for querying LLM.
******************************
Self-reflection is achieved by showing two-shot examples to LLM, where each example is a pair of a failed trajectory and an ideal reflection for guiding future changes in the 