In [13]:
# !pip install --upgrade --quiet  pinecone-client pinecone-text pinecone-notebooks

In [14]:
pip install --upgrade --quiet pinecone pinecone-text pinecone-notebooks


Note: you may need to restart the kernel to use updated packages.


In [15]:
from dotenv import load_dotenv
import os

load_dotenv() 

True

In [16]:
os.environ['PINECONE_API_KEY'] =os.getenv('PINECONE_API_KEY')

In [17]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [18]:
# create the index
# os.environ['PINECONE_API_KEY'] =os.getenv('PINECONE_API_KEY')
from pinecone import Pinecone ,ServerlessSpec
index_name ='hybird-search-langchain-pinecone'

#initializze the Pinecone client

pc = Pinecone()

#ctreat the index

if index_name not in pc.list_indexes().names():
    pc.create_index(
            name = index_name ,
            dimension= 384 ,# dimension of dense vector
            metric= 'dotproduct', # sparse values supported only for dotproduct
            spec = ServerlessSpec(cloud ='aws', region ='us-east-1')


    )

In [19]:
index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x2be14d52e00>

In [20]:
## vector embedding and sparse matrix
os.environ['HF_TOKEN'] =os.getenv('HF_TOKEN')



from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name ='sentence-transformers/all-MiniLM-L6-v2')
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [21]:
# sparse  matrix

from pinecone_text.sparse import BM25Encoder

bm25_encoder = BM25Encoder().default()
bm25_encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x2be50c4f580>

In [22]:
sentences = [
        "In 2023 i visited Paris",
        "In 2024 , I visited New york",
        "in 2025 ,i visited Dubai"


]


# tfidf values on these sentence
bm25_encoder.fit(sentences)

## stores the values to a json file 
bm25_encoder.dump('bm25_values.json')

100%|██████████| 3/3 [00:00<00:00, 2088.80it/s]


In [23]:
retriever = PineconeHybridSearchRetriever(embeddings= embeddings , sparse_encoder= bm25_encoder , index = index)

retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x000002BE50C4F580>, index=<pinecone.data.index.Index object at 0x000002BE14D52E00>)

In [24]:
retriever.add_texts (
     [
        "In 2023 i visited Paris",
        "In 2024 , I visited New york",
        "in 2025 ,i visited Dubai"


]
)

100%|██████████| 1/1 [00:03<00:00,  3.01s/it]


In [25]:
retriever.invoke ('what city did i visit last ')

[Document(metadata={'score': 0.26735}, page_content='In 2024 , I visited New york'),
 Document(metadata={'score': 0.230950952}, page_content='In 2023 i visited Paris'),
 Document(metadata={'score': 0.186537176}, page_content='in 2025 ,i visited Dubai')]

In [26]:
retriever.invoke ('what city did i visit recent ')

[Document(metadata={'score': 0.241437107}, page_content='In 2024 , I visited New york'),
 Document(metadata={'score': 0.19892031}, page_content='In 2023 i visited Paris'),
 Document(metadata={'score': 0.161007136}, page_content='in 2025 ,i visited Dubai')]