# Hybrid Search

In [79]:
!pip install rank_bm25



In [80]:
from dotenv import load_dotenv
import os

# Load variables from .env file. Must have an entry titled OPENAI_API_KEY
load_dotenv()

# Retrieve the API key from the environment
api_key = os.getenv("OPENAI_API_KEY")

## BM25 Retriever - Sparse retriever

In [81]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.vectorstores import FAISS

from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(api_key=api_key)


In [92]:
doc_list = [
    "I like apple",
    "I like oranges",
    "Apples and oranges are fruits",
    "I like computers by Apple",
    "I love fruit juice",
    "A green mattress",
    "A phone with wifi connection"
]

In [93]:
# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_texts(doc_list)
bm25_retriever.k = 2

In [94]:
bm25_retriever.get_relevant_documents("microsoft phone")

[Document(metadata={}, page_content='A phone with wifi connection'),
 Document(metadata={}, page_content='A green mattress')]

In [95]:
bm25_retriever.get_relevant_documents("Apple juice computers")

[Document(metadata={}, page_content='I like computers by Apple'),
 Document(metadata={}, page_content='I love fruit juice')]

## Embeddings - Dense retrievers FAISS

In [96]:
faiss_vectorstore = FAISS.from_texts(doc_list, embedding)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})

In [97]:
faiss_retriever.get_relevant_documents("apple iphone")

[Document(id='4cf99f4c-9143-4b5b-bf11-72a1552946d2', metadata={}, page_content='I like apple'),
 Document(id='0b68268c-ae1e-4fcd-ab5d-2f1a7958dd29', metadata={}, page_content='I like computers by Apple')]

## Ensemble Retriever

In [98]:
# initialize the ensemble retriever with more weight to the faiss retriever
ensemble_retriever1 = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever],
                                       weights=[0.3, 0.7])

# initialize the ensemble retriever with more weight to the bm25 retriever
ensemble_retriever2 = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever],
                                       weights=[0.7, 0.3])

In [99]:
query = "apple Computers and Phones"
docs1 = ensemble_retriever1.get_relevant_documents(query)
print(docs1)
docs2 = ensemble_retriever2.get_relevant_documents(query)
print(docs2)

[Document(metadata={}, page_content='I like apple'), Document(id='0b68268c-ae1e-4fcd-ab5d-2f1a7958dd29', metadata={}, page_content='I like computers by Apple'), Document(metadata={}, page_content='Apples and oranges are fruits')]
[Document(metadata={}, page_content='I like apple'), Document(metadata={}, page_content='Apples and oranges are fruits'), Document(id='0b68268c-ae1e-4fcd-ab5d-2f1a7958dd29', metadata={}, page_content='I like computers by Apple')]


In [100]:
query = "iphone is competitor of samsung phones"
docs1 = ensemble_retriever1.get_relevant_documents(query)
print(docs1)
docs2 = ensemble_retriever2.get_relevant_documents(query)
print(docs2)

[Document(id='0b68268c-ae1e-4fcd-ab5d-2f1a7958dd29', metadata={}, page_content='I like computers by Apple'), Document(id='4cf99f4c-9143-4b5b-bf11-72a1552946d2', metadata={}, page_content='I like apple'), Document(metadata={}, page_content='A phone with wifi connection'), Document(metadata={}, page_content='A green mattress')]
[Document(metadata={}, page_content='A phone with wifi connection'), Document(metadata={}, page_content='A green mattress'), Document(id='0b68268c-ae1e-4fcd-ab5d-2f1a7958dd29', metadata={}, page_content='I like computers by Apple'), Document(id='4cf99f4c-9143-4b5b-bf11-72a1552946d2', metadata={}, page_content='I like apple')]
