#### Hybrid Search RAG application

In [1]:
Pinecone_API_KEY = "pcsk_6KZDkH_LWgRaXenprJsqPqW293SaddcUATFXrfKnWyt2QHPhpkUbPgqaAbfViaFV6VY4Zm"

In [3]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [9]:
import os
from pinecone import Pinecone, ServerlessSpec

index_name = "hybrid-search"

##initialize the Pinecone client
pc = Pinecone(api_key=Pinecone_API_KEY)

## Create the index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric='dotproduct', ## for sparse matrix
        spec=ServerlessSpec(cloud='aws', region='us-east-1')
    )

In [10]:
index = pc.Index(index_name)
index

<pinecone.db_data.index.Index at 0x7f49a80ce3b0>

In [12]:
## Create vector embedding and sparse matrix
HF_TOKEN = "Your_API_KEY"

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings

2026-01-26 10:26:44.348433: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-01-26 10:26:44.348540: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-01-26 10:26:44.415422: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-01-26 10:26:44.561941: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [13]:
from pinecone_text.sparse import BM25Encoder

encoder = BM25Encoder().default()
encoder

[nltk_data] Downloading package stopwords to /home/vscode/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x7f498f3d9bd0>

In [14]:
sentences = [
    "In 2023, I visited Pondicherry",
    'In 2021, I visited Tirupati',
    'In 2020, I visited Manthralaya'
]

encoder.fit(sentences)

## Store the values to json_file
encoder.dump("bm25_values.json")

  0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=encoder, index=index)
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x7f498f3d9bd0>, index=<pinecone.db_data.index.Index object at 0x7f49a80ce3b0>)

In [16]:
retriever.add_texts([
    "In 2023, I visited Pondicherry",
    'In 2021, I visited Tirupati',
    'In 2020, I visited Manthralaya'
])

  0%|          | 0/1 [00:00<?, ?it/s]

In [18]:
retriever.invoke("What city did I visited in 2023?")

[Document(metadata={'score': 0.387366295}, page_content='In 2023, I visited Pondicherry'),
 Document(metadata={'score': 0.269859523}, page_content='In 2021, I visited Tirupati'),
 Document(metadata={'score': 0.218556628}, page_content='In 2020, I visited Manthralaya')]