# Environment Setup

In [1]:
!pip install --upgrade langchain langchain-openai faiss-cpu tiktoken

Collecting langchain
  Downloading langchain-0.1.5-py3-none-any.whl (806 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/806.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.6/806.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m806.7/806.7 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-openai
  Downloading langchain_openai-0.0.5-py3-none-any.whl (29 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m8

In [2]:
import os

os.environ['OPENAI_API_KEY'] = "..."

# Experiment

## Naive RAG

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [4]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2020-01-29-curriculum-rl/",
           "https://lilianweng.github.io/posts/2020-04-07-the-transformer-family/",
           "https://lilianweng.github.io/posts/2020-06-07-exploration-drl/",
           "https://lilianweng.github.io/posts/2020-08-06-nas/",
           "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot"
           ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter()
splits = text_splitter.split_documents(docs)

vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [9]:
question = "What is the Augmented Language Models"
retrieval_chain.invoke(question)

'Augmented Language Models are language models that have been enhanced with reasoning skills and the ability to use external tools. They are discussed in a survey by Mialon et al. in 2023.'

## Advanced RAG

### Re-ranking

In [10]:
!pip install -U cohere

Collecting cohere
  Downloading cohere-4.45-py3-none-any.whl (52 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting backoff<3.0,>=2.0 (from cohere)
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting fastavro<2.0,>=1.8 (from cohere)
  Downloading fastavro-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m76.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting importlib_metadata<7.0,>=6.0 (from cohere)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl (23 kB)
Installing collected packages: importlib_metadata, fastavro, backoff, cohere
  Attempting uninstall: importlib_metadata
    Found existing installation: importlib-metadata 7.0.1
    Uninstalling importlib-metadata-7.

In [17]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.llms import Cohere

from getpass import getpass

In [18]:
COHERE_API_KEY = "..."
os.environ["COHERE_API_KEY"] = COHERE_API_KEY


COHERE_API_KEY··········


In [11]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2020-01-29-curriculum-rl/",
           "https://lilianweng.github.io/posts/2020-04-07-the-transformer-family/",
           "https://lilianweng.github.io/posts/2020-06-07-exploration-drl/",
           "https://lilianweng.github.io/posts/2020-08-06-nas/",
           "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot"
           ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [22]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [23]:
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [24]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()

In [25]:
llm = Cohere(temperature=0)
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [26]:
prompt = ChatPromptTemplate.from_template(template)

retrieval_chain = (
    {"context": compression_retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [27]:
question = "What is the Augmented Language Models"
retrieval_chain.invoke(question)

' Augmented Language Models (ALMs) are language models that have been enhanced with reasoning skills and the capacity to utilize external tools. According to a survey by Mialon et al. in 2023, they cover a wide range of categories of language models that have been augmented. One example of an ALM is Toolformer, a model that has learned to use tools. Another example would be PAL (Program-Aided Language models.) These models utilize external knowledge bases to retrieve content for response generation. Generally, there are many different approaches to augmenting language models. '

### HYDE

In [28]:
from langchain.chains import HypotheticalDocumentEmbedder, LLMChain

In [29]:
"""
  Re-implemented from: https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb
"""

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2020-01-29-curriculum-rl/",
           "https://lilianweng.github.io/posts/2020-04-07-the-transformer-family/",
           "https://lilianweng.github.io/posts/2020-06-07-exploration-drl/",
           "https://lilianweng.github.io/posts/2020-08-06-nas/",
           "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot"
           ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [30]:
base_embeddings = OpenAIEmbeddings()
llm = ChatOpenAI()

# Load with `web_search` prompt
embeddings = HypotheticalDocumentEmbedder.from_llm(llm, base_embeddings, "web_search")

In [31]:
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [32]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()


retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [33]:
question = "What is the Augmented Language Models"
retrieval_chain.invoke(question)

'Augmented Language Models refer to language models that have been enhanced with reasoning skills and the ability to use external tools. They are designed to go beyond traditional language models by incorporating additional capabilities for tasks that require the latest knowledge or access to external knowledge bases.'

### Meta-Prompt

In [34]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI

In [35]:
def initialize_chain(instructions, memory=None):
    if memory is None:
        memory = ConversationBufferWindowMemory()
        memory.ai_prefix = "Assistant"

    template = f"""
    Instructions: {instructions}
    {{{memory.memory_key}}}
    Human: {{human_input}}
    Assistant:"""

    prompt = PromptTemplate(
        input_variables=["history", "human_input"], template=template
    )

    chain = LLMChain(
        llm=OpenAI(temperature=0),
        prompt=prompt,
        verbose=True,
        memory=ConversationBufferWindowMemory(),
    )
    return chain


def initialize_meta_chain():
    meta_template = """
    Assistant has just had the below interactions with a User. Assistant followed their "Instructions" closely. Your job is to critique the Assistant's performance and then revise the Instructions so that Assistant would quickly and correctly respond in the future.

    ####

    {chat_history}

    ####

    Please reflect on these interactions.

    You should first critique Assistant's performance. What could Assistant have done better? What should the Assistant remember about this user? Are there things this user always wants? Indicate this with "Critique: ...".

    You should next revise the Instructions so that Assistant would quickly and correctly respond in the future. Assistant's goal is to satisfy the user in as few interactions as possible. Assistant will only see the new Instructions, not the interaction history, so anything important must be summarized in the Instructions. Don't forget any important details in the current Instructions! Indicate the new Instructions by "Instructions: ...".
    """

    meta_prompt = PromptTemplate(
        input_variables=["chat_history"], template=meta_template
    )

    meta_chain = LLMChain(
        llm=OpenAI(temperature=0),
        prompt=meta_prompt,
        verbose=True,
    )
    return meta_chain


def get_chat_history(chain_memory):
    memory_key = chain_memory.memory_key
    chat_history = chain_memory.load_memory_variables(memory_key)[memory_key]
    return chat_history


def get_new_instructions(meta_output):
    delimiter = "Instructions: "
    new_instructions = meta_output[meta_output.find(delimiter) + len(delimiter) :]
    return new_instructions

In [36]:
def main(task, max_iters=3, max_meta_iters=5):
    failed_phrase = "task failed"
    success_phrase = "task succeeded"
    key_phrases = [success_phrase, failed_phrase]

    instructions = "None"
    for i in range(max_meta_iters):
        print(f"[Episode {i+1}/{max_meta_iters}]")
        chain = initialize_chain(instructions, memory=None)
        output = chain.predict(human_input=task)
        for j in range(max_iters):
            print(f"(Step {j+1}/{max_iters})")
            print(f"Assistant: {output}")
            print("Human: ")
            human_input = input()
            if any(phrase in human_input.lower() for phrase in key_phrases):
                break
            output = chain.predict(human_input=human_input)
        if success_phrase in human_input.lower():
            print("You succeeded! Thanks for playing!")
            return
        meta_chain = initialize_meta_chain()
        meta_output = meta_chain.predict(chat_history=get_chat_history(chain.memory))
        print(f"Feedback: {meta_output}")
        instructions = get_new_instructions(meta_output)
        print(f"New Instructions: {instructions}")
        print("\n" + "#" * 80 + "\n")
    print("You failed! Thanks for playing!")

In [None]:
task = "Provide a systematic argument for why we should always eat pasta with olives."
main(task)

### Fusion

In [42]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2020-01-29-curriculum-rl/",
           "https://lilianweng.github.io/posts/2020-04-07-the-transformer-family/",
           "https://lilianweng.github.io/posts/2020-06-07-exploration-drl/",
           "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot"
           ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=200)
splits = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()
llm = ChatOpenAI()

vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [43]:
from langchain.load import dumps, loads
"""
Re implemented from: https://github.com/langchain-ai/langchain/blob/master/cookbook/rag_fusion.ipynb
"""

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [44]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

fusion = retrieval_chain | retriever.map() | reciprocal_rank_fusion

In [None]:
question = "What is the Augmented Language Models"
fusion.invoke(question)