In [5]:
from pinecone_text.sparse import BM25Encoder
from langchain_huggingface import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_community.retrievers import PineconeHybridSearchRetriever
import os
from dotenv import load_dotenv
load_dotenv()

text = [
    "Narendra Modi is the prime minister of India",
    "Donald Trump is the president of North America",
    "Gaston Browne is the prime minister of Antigua",
    "Keith Rowley is the prime minister of trinidad and tobago",
    "Justin Trudeau is the prime mister of Canada"
]

# sparse vector embeddings
bm25_encoder = BM25Encoder().default() # tfidf encoder for sparse representation
bm25_encoder.fit(text) # applying tfidf values on the text

# dense vector embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# create pinecone vectorDB index
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key) # initialize pinecone client
index_name = "pm-of-countries"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384, # dimension of dense vector
        metric="dotproduct", # dot product supported for only sparse values
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
index = pc.Index(index_name)
retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=bm25_encoder, index=index)
retriever.add_texts(text)

100%|██████████| 5/5 [00:00<00:00, 660.42it/s]
100%|██████████| 1/1 [00:04<00:00,  4.28s/it]


In [9]:
retriever.invoke("Who is the prime minister of India?")

[Document(metadata={'score': 0.60089016}, page_content='Narendra Modi is the prime minister of India'),
 Document(metadata={'score': 0.298049808}, page_content='Keith Rowley is the prime minister of trinidad and tobago'),
 Document(metadata={'score': 0.287390113}, page_content='Gaston Browne is the prime minister of Antigua'),
 Document(metadata={'score': 0.287325621}, page_content='Justin Trudeau is the prime mister of Canada')]