Brief overview how to use
- SelfQueryRetriever
- ParentDocumentRetriever
- Hybrid Search with BM25Retriever and EnsembleRetriever
- ContextualCompressionRetriever and Filters
- MultiQueryRetriever
- Cohere Reranker

## SelfQueryRetriever

In [None]:
import pandas as pd
from langchain.schema import Document

df = pd.read_csv("data/wine_data.csv")

documents = list(df.apply(lambda row: Document(page_content=row["page_content"],
                    metadata=row[["name", "year", "rating", "grape", "color", "country"]].to_dict()), axis=1))


In [None]:
df.head(1)

In [None]:
documents

In [None]:
from langchain.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(),override=True)

embeddings = OpenAIEmbeddings()

In [None]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = [
    AttributeInfo(
        name="grape",
        description="The grape used to make the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="name",
        description="The name of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="color",
        description="The color of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the grapes where harvested.",
        type="integer",
    ),
    AttributeInfo(
        name="country",
        description="The name of the country the wine comes from",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float
    ),
]
document_content_description = "Brief description of the wine"

In [None]:
llm = OpenAI(temperature=0)

vectorstore = Chroma.from_documents(documents, embeddings)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,
    verbose=True
)

In [None]:
retriever.get_relevant_documents("I am looking for 3 white wines between 2010-2020 with ratings between 85-92")

## ParentDocumentRetriever

In [None]:
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.retrievers import ParentDocumentRetriever

## Text Splitting & Docloader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain.document_loaders import TextLoader

from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    #model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

In [None]:
import glob

loaders = [
    TextLoader(doc, encoding="utf-8") for doc in glob.glob("../resources/tests/document_store/*.txt")
]
docs = []
for l in loaders:
    docs.extend(l.load())

# This text splitter is used to create the parent documents
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)

# This text splitter is used to create the child documents
# It should create documents smaller than the parent
child_splitter = RecursiveCharacterTextSplitter(chunk_size=300)

# The vectorstore to use to index the child chunks
vectorstore = Chroma(collection_name="split_parents", embedding_function=bge_embeddings)

# The storage layer for the parent documents
store = InMemoryStore()

full_docs_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore, 
    docstore=store, 
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

full_docs_retriever.add_documents(docs, ids=None)

list(store.yield_keys())

In [None]:
query = "What was Churchill thinking?"
sub_docs = vectorstore.similarity_search(query, k=2)

print("\n\n".join(sub_docs[i].page_content for i in range(2)))

In [None]:
retrieved_docs = full_docs_retriever.get_relevant_documents(query)

print("\n\n".join(retrieved_docs[i].page_content for i in range(1)))

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(),override=True)

qa = RetrievalQA.from_chain_type(llm=OpenAI(),
                                 chain_type="stuff",
                                 retriever=full_docs_retriever)

qa.run(query)

## Hybrid Search with BM25Retriever and EnsembleRetriever

##### https://colab.research.google.com/drive/1lsT1V_U1Gq-jv09wv0ok5QHdyRjJyNxm?usp=sharing#scrollTo=Hv3UgdKiiuVr

## Contextual Compression and Filters

In [None]:
from langchain.schema import Document
from langchain.vectorstores import Chroma

## Text Splitting & Docloader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain.document_loaders import TextLoader

from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceBgeEmbeddings

import uuid
import glob

model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    #model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

In [None]:
def pprint(docs):
    print(f"\n{'-' * 50}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))
    
loaders = [
    TextLoader(doc, encoding="utf-8") for doc in glob.glob("../resources/tests/document_store/*.txt")
]
docs = []
for l in loaders:
    docs.extend(l.load())

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
chunks = text_splitter.split_documents(docs)

# collection_name has to be different in each .from_documents call, otherwise Chroma always uses default collection
retriever = Chroma.from_documents(chunks, collection_name=str(uuid.uuid4()), embedding=bge_embeddings)\
                    .as_retriever(search_kwargs={"k": 20})

query = "What was Churchill thinking?"

docs = retriever.get_relevant_documents(query)

#lets look at the docs
pprint(docs)

In [None]:
from langchain.llms import OpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(), override=True)

# Creating the compressor
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

# first stage will be usual retrieval with "retriever", second stage is to apply compressor
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=retriever)

In [None]:
compressed_docs = compression_retriever.get_relevant_documents(query)

pprint(compressed_docs)

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(),override=True)

qa = RetrievalQA.from_chain_type(llm=OpenAI(),
                                 chain_type="stuff",
                                 retriever=compression_retriever)

qa.run(query)

In [None]:
# Filtering what document chunks should be passed to the LLM

# 1. LLMChainFilter
# https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.chain_filter.LLMChainFilter.html

from langchain.retrievers.document_compressors import LLMChainFilter

filter = LLMChainFilter.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(base_compressor=filter, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query)

pprint(compressed_docs)

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(),
                                 chain_type="stuff",
                                 retriever=compression_retriever)

qa.run(query)

In [None]:
# 2. EmbeddingsFilter
# Filter out all chunks that are less similar to user query embedding
# https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.embeddings_filter.EmbeddingsFilter.html

from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings_filter = EmbeddingsFilter(embeddings=bge_embeddings, k=5)

compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query)

pprint(compressed_docs)

In [None]:
# Create a pipeline using several Filters

from langchain.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter

## Creating the pipeline
pipeline_compressor = DocumentCompressorPipeline(

    # 1. Extract important info wrt query
    # 2. Filter based on query relevance
    # 3. Filter based on embedding redundancy
    transformers=[#CharacterTextSplitter(chunk_size=200, chunk_overlap=0, separator=". "), # first split chunks
                  LLMChainExtractor.from_llm(llm), # extract useful information from these chunks
                  LLMChainFilter.from_llm(llm), # filter these texts based on query relevance
                  EmbeddingsRedundantFilter(embeddings=bge_embeddings, similarity_threshold=0.90), # filter on emb similarity
                  #EmbeddingsFilter(embeddings=bge_embeddings, k=3), # get 3 most relavant chunks by similarity to query
                 ]
)

compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                       base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents(query)
pprint(compressed_docs)

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(),
                                 chain_type="stuff",
                                 retriever=compression_retriever)

qa.run(query)

Examples Pipelines

Example 1 - filter, rewrite, check with embeddings

Example 2 - retrieve multiple sources (Ensemble with BM25), filter, rewrite,

Example 3 - retrieve, split, check splits with embeddings, filter, rewrite,

### MultiQueryRetriever

In [None]:
# https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever

from langchain.chat_models import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.schema import Document
from langchain.vectorstores import Chroma

## Text Splitting & Docloader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

from langchain.embeddings import HuggingFaceBgeEmbeddings

import uuid
import glob

model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    #model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

def pprint(docs):
    print(f"\n{'-' * 50}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))
    
loaders = [
    TextLoader(doc, encoding="utf-8") for doc in glob.glob("../resources/tests/document_store/*.txt")
]

docs = []
for l in loaders:
    docs.extend(l.load())
    
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
chunks = text_splitter.split_documents(docs)

# collection_name has to be different in each .from_documents call, otherwise Chroma always uses default collection
retriever = Chroma.from_documents(chunks, collection_name=str(uuid.uuid4()), embedding=bge_embeddings)\
                    .as_retriever(search_kwargs={"k": 3})

llm = ChatOpenAI(temperature=0)

multi_query_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)

In [None]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

query = "What did Churchill think?"

docs = multi_query_retriever.get_relevant_documents(query=query)

pprint(docs)

In [None]:
from langchain.chains import RetrievalQA
import dotenv
dotenv.load_dotenv(dotenv.find_dotenv(), override=True)

qa = RetrievalQA.from_chain_type(llm=OpenAI(),
                                 chain_type="stuff",
                                 retriever=multi_query_retriever,
                                 return_source_documents=True)

qa(query)

### Cohere Reranker

In [None]:
import os
import getpass

os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [None]:
def pprint(docs):
    print(
        f"\n{'-' * 50}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Chroma
import glob
import uuid

documents = TextLoader(glob.glob("../resources/tests/document_store/*.txt")[0], encoding="utf-8").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
texts = text_splitter.split_documents(documents)

retriever = Chroma.from_documents(texts, collection_name=str(uuid.uuid4()), embedding=OpenAIEmbeddings()).as_retriever(
    search_kwargs={"k": 20}
)

query = "What did Churchill think?"

docs = retriever.get_relevant_documents(query)
pprint(docs)

In [None]:
from langchain.llms import OpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

llm = OpenAI(temperature=0)
compressor = CohereRerank()

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents(query)

pprint(compressed_docs)

In [None]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(
    llm=OpenAI(temperature=0), retriever=compression_retriever
)

chain(query)

In [None]:
# With MultiQueryRetriever
from langchain.chains import RetrievalQA
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_reranker = ContextualCompressionRetriever(
    base_compressor=CohereRerank(), base_retriever=MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)
)

chain = RetrievalQA.from_chain_type(
    llm=OpenAI(temperature=0), retriever=multi_reranker
)

chain({"query": query})