In [5]:
!pip install --upgrade --quiet pinecone pinecone-client pinecone-text pinecone-notebooks

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-objectbox 0.1.0 requires langchain-core<0.2.0,>=0.1.45, but you have langchain-core 0.3.76 which is incompatible.


In [None]:
# Load API key from environment variables for security
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("PINECONE_API_KEY")

if not api_key:
    print("⚠️ Warning: PINECONE_API_KEY not found in environment variables!")
    print("Please add PINECONE_API_KEY to your .env file")

In [2]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [4]:
import os
from pinecone import Pinecone,ServerlessSpec
index_name="hybrid-search-langchain-pinecone"
#initialize pinecone client

pc=Pinecone(api_key=api_key)

#check if index already exists

if index_name not in pc.list_indexes():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="dotproduct", #required for hybrid search
        spec=ServerlessSpec(cloud="aws",region="us-east-1")
    )

In [5]:
index=pc.Index(index_name)
index

<pinecone.db_data.index.Index at 0x2e286062410>

In [6]:
##vector Embedding and Sparse Matrix
import os 
from dotenv import load_dotenv
load_dotenv()
os.environ["HUGGINGFACEHUB_API_TOKEN"]=os.getenv("HUGGINGFACEHUB_API_TOKEN")
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [7]:
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
from pinecone_text.sparse import BM25Encoder
#BM25 uses TF-IDF algorithm to convert text into sparse vectors
BM_encoder=BM25Encoder().default()
BM_encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x2e286062320>

In [9]:
sentences = [
    "The 'I am Iron Man' line was a last-minute addition to the script.",
    "Captain America lifting Mjolnir was foreshadowed in Avengers: Age of Ultron.",
    "The final battle scene features nearly every Marvel Cinematic Universe hero.",
    "Tony Stark's daughter, Morgan, saying 'I love you 3000' was inspired by Robert Downey Jr.'s real-life interaction with his children.",
    "Thanos' line 'I am inevitable' reflects his belief in destiny and balance.",
    "The Avengers' time travel suits were entirely CGI and not physical costumes.",
    "The cheeseburger scene at Tony Stark's funeral is a callback to the first Iron Man movie.",
    "Doctor Strange raising one finger to Tony Stark signifies the one winning scenario he foresaw in Infinity War.",
    "The sound of hammering at the end credits is a tribute to Tony Stark building his first suit in Iron Man.",
    "Thor's transformation into 'Bro Thor' symbolizes his struggle with guilt and failure.",
    "Hawkeye's journey as Ronin shows the darker side of his character after losing his family.",
    "The 'On your left' line during the final battle is a callback to Captain America: The Winter Soldier.",
    "Black Widow's sacrifice on Vormir mirrors Gamora's death in Infinity War.",
    "Hulk's snap to bring everyone back caused permanent damage to his arm.",
    "The time travel explanation in Endgame is based on a multiverse theory rather than traditional time loops."
]

In [10]:
BM_encoder.fit(sentences)

BM_encoder.dump("bm25_values.json")

BM_encoder=BM25Encoder().load("bm25_values.json")

  0%|          | 0/15 [00:00<?, ?it/s]

In [11]:
retriever=PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=BM_encoder,index=index)

In [12]:
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x000002E2AAC1EF80>, index=<pinecone.db_data.index.Index object at 0x000002E286062410>)

In [13]:
retriever.add_texts(sentences)

  0%|          | 0/1 [00:00<?, ?it/s]

In [17]:
retriever.invoke("who said I love you 3000")

[Document(metadata={'score': 0.395565957}, page_content="Tony Stark's daughter, Morgan, saying 'I love you 3000' was inspired by Robert Downey Jr.'s real-life interaction with his children."),
 Document(metadata={'score': 0.0798287392}, page_content="The 'On your left' line during the final battle is a callback to Captain America: The Winter Soldier."),
 Document(metadata={'score': 0.0765957832}, page_content='The sound of hammering at the end credits is a tribute to Tony Stark building his first suit in Iron Man.'),
 Document(metadata={'score': 0.115713596}, page_content="The 'I am Iron Man' line was a last-minute addition to the script.")]