# **Import libraries**

In [None]:
!pip -q install langchain huggingface_hub openai google-search-results tiktoken chromadb lark

In [None]:
!pip install rank_bm25

In [38]:
# Model
from langchain.llms import OpenAI

# Data making tools
from langchain.schema import Document
from langchain.chains.query_constructor.base import AttributeInfo

# Vector stores
from langchain.vectorstores import Chroma

# Splitting data
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Embedding
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# Prompt
from langchain.prompts import PromptTemplate

# Advanced RAG:
# I. Self query
from langchain.retrievers.self_query.base import SelfQueryRetriever

# II. Multi query
from langchain.retrievers.multi_query import MultiQueryRetriever

import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

# III. Ensemble Retriever
from langchain.retrievers import BM25Retriever, EnsembleRetriever

# IV. Contextual Compression
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# V. Parent Document Retriever
from langchain.retrievers import ParentDocumentRetriever

# Others
from langchain.storage import InMemoryStore

# **Advanced RAG**

## Self Query

In [6]:
docs = [
    Document(
        page_content="Complex, layered, rich red with dark fruit flavors",
        metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
        metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
    ),
    Document(
        page_content="Full-bodied red with notes of black fruit and spice",
        metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
    ),
    Document(
        page_content="Elegant, balanced red with herbal and berry nuances",
        metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
    ),
    Document(
        page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
        metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
    ),
    Document(
        page_content="Crisp white with tropical fruit and citrus flavors",
        metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
    ),
    Document(
        page_content="Rich, complex Champagne with notes of brioche and citrus",
        metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
    ),
    Document(
        page_content="Intense, dark fruit flavors with hints of chocolate",
        metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Exotic, aromatic white with stone fruit and floral notes",
        metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
    ),
]
vectorstore = Chroma.from_documents(docs, embeddings)

In [9]:
metadata_field_info = [
    AttributeInfo(
        name="grape",
        description="The grape used to make the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="name",
        description="The name of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="color",
        description="The color of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the wine was released",
        type="integer",
    ),
    AttributeInfo(
        name="country",
        description="The name of the country the wine comes from",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="The Robert Parker rating for the wine 0-100", type="integer"
    ),
]
document_content_description = "Brief description of the wine"

In [10]:
llm = OpenAI(temperature=0)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    verbose=True
)

### Ex1

In [11]:
res1 = retriever.get_relevant_documents("What are some red wines")
res1

[Document(page_content='Elegant, balanced red with herbal and berry nuances', metadata={'color': 'red', 'country': 'Italy', 'grape': 'Cabernet Franc', 'name': 'Sassicaia', 'rating': 95, 'year': 2016}),
 Document(page_content='Full-bodied red with notes of black fruit and spice', metadata={'color': 'red', 'country': 'Australia', 'grape': 'Shiraz', 'name': 'Penfolds Grange', 'rating': 97, 'year': 2017}),
 Document(page_content='Complex, layered, rich red with dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Opus One', 'rating': 96, 'year': 2018}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018})]

### Ex2

In [12]:
res2 = retriever.get_relevant_documents("I want a wine that has fruity nodes")
res2

[Document(page_content='Crisp white with tropical fruit and citrus flavors', metadata={'color': 'white', 'country': 'New Zealand', 'grape': 'Sauvignon Blanc', 'name': 'Cloudy Bay', 'rating': 92, 'year': 2021}),
 Document(page_content='Intense, dark fruit flavors with hints of chocolate', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Caymus Special Selection', 'rating': 96, 'year': 2018}),
 Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Complex, layered, rich red with dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Opus One', 'rating': 96, 'year': 2018})]

### Ex3

In [13]:
res3 = retriever.get_relevant_documents("I want a wine that has fruity nodes and has a rating above 97")
res3

[Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018})]

## Multi query

In [17]:
question = "I want a wine that has fruity nodes and has a rating above 97"
llm = OpenAI(temperature=0)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

res = retriever_from_llm.get_relevant_documents(query=question)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you suggest a highly rated wine with fruity notes?', '2. What are some top-rated wines with fruity flavors?', '3. Could you recommend a wine with a rating above 97 and fruity characteristics?']


In [18]:
res

[Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018}),
 Document(page_content='Full-bodied red with notes of black fruit and spice', metadata={'color': 'red', 'country': 'Australia', 'grape': 'Shiraz', 'name': 'Penfolds Grange', 'rating': 97, 'year': 2017}),
 Document(page_content='Elegant, balanced red with herbal and berry nuances', metadata={'color': 'red', 'country': 'Italy', 'grape': 'Cabernet Franc', 'name': 'Sassicaia', 'rating': 95, 'year': 2016}),
 Document(page_content='Complex, layered, rich red with dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvigno

##Ensemble Retriever

In [31]:
# bm25
bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = 2

# chroma
chroma_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

# Ensemble
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.5, 0.5])

In [32]:
result = ensemble_retriever.get_relevant_documents("I want a wine that has fruity nodes and has a rating above 97")
result

[Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018}),
 Document(page_content='Crisp white with tropical fruit and citrus flavors', metadata={'name': 'Cloudy Bay', 'year': 2021, 'rating': 92, 'grape': 'Sauvignon Blanc', 'color': 'white', 'country': 'New Zealand'})]

## Contextual Compression

In [34]:
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=vectorstore.as_retriever())

In [35]:
question = "Fruity flavor wine"
result = compression_retriever.get_relevant_documents(question)
result



[Document(page_content='honey, apricot, peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Crisp white with tropical fruit and citrus flavors', metadata={'color': 'white', 'country': 'New Zealand', 'grape': 'Sauvignon Blanc', 'name': 'Cloudy Bay', 'rating': 92, 'year': 2021}),
 Document(page_content='dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Opus One', 'rating': 96, 'year': 2018}),
 Document(page_content='Intense, dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Caymus Special Selection', 'rating': 96, 'year': 2018})]

## Parent Document Retriever

In [112]:
child_splitter = RecursiveCharacterTextSplitter(chunk_size=25, chunk_overlap = 0)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap = 0)
store = InMemoryStore()
retriever = ParentDocumentRetriever(vectorstore=vectorstore,
                                    docstore=store,
                                    child_splitter=child_splitter,
                                    parent_splitter = parent_splitter)
retriever.add_documents(docs, ids=None)

In [113]:
result = retriever.get_relevant_documents("I want a wine that has fruity nodes")
result

[Document(page_content='Luxurious, sweet wine with flavors of honey,', metadata={'name': "Château d'Yquem", 'year': 2015, 'rating': 98, 'grape': 'Sémillon', 'color': 'white', 'country': 'France'})]

In [114]:
split = parent_splitter.split_documents(docs)
split

[Document(page_content='Complex, layered, rich red with dark fruit flavors', metadata={'name': 'Opus One', 'year': 2018, 'rating': 96, 'grape': 'Cabernet Sauvignon', 'color': 'red', 'country': 'USA'}),
 Document(page_content='Luxurious, sweet wine with flavors of honey,', metadata={'name': "Château d'Yquem", 'year': 2015, 'rating': 98, 'grape': 'Sémillon', 'color': 'white', 'country': 'France'}),
 Document(page_content='apricot, and peach', metadata={'name': "Château d'Yquem", 'year': 2015, 'rating': 98, 'grape': 'Sémillon', 'color': 'white', 'country': 'France'}),
 Document(page_content='Full-bodied red with notes of black fruit and', metadata={'name': 'Penfolds Grange', 'year': 2017, 'rating': 97, 'grape': 'Shiraz', 'color': 'red', 'country': 'Australia'}),
 Document(page_content='spice', metadata={'name': 'Penfolds Grange', 'year': 2017, 'rating': 97, 'grape': 'Shiraz', 'color': 'red', 'country': 'Australia'}),
 Document(page_content='Elegant, balanced red with herbal and berry', me