## **Pinecone API Key**

In [1]:
api_key="PINECONE API KEY"

In [2]:
from langchain_community.retrievers import PineconeHybridSearchRetriever  # Can do both semantic and syntatic search

In [3]:
import os
from pinecone import Pinecone , ServerlessSpec
index_name="hybris-search-langchain-pinecone"


##initialize the pinecone client 
pc= Pinecone(api_key=api_key)

##create the index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name , 
        dimension=384 , ## dense vector
        metric='dotproduct',  ##sparse value 
        spec=ServerlessSpec(cloud='aws' , region='us-east-1'),
    )

In [4]:
index=pc.Index(index_name)
index

<pinecone.data.index.Index at 0x1e1bf6714e0>

In [5]:
### Vector Embeddings and sparse matrix

import os
from dotenv import load_dotenv
load_dotenv()

from langchain_huggingface import HuggingFaceEmbeddings
embeddings =  HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [6]:
from pinecone_text.sparse import BM25Encoder

bm25_encoder=BM25Encoder().default()
bm25_encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x1e1ef02efe0>

In [7]:
sentences=[
    "In 2023, I visited Paris",
        "In 2022, I visited New York",
        "In 2021, I visited New Orleans",

]

## Default - TFIDF
bm25_encoder.fit(sentences)

## Store the values to json file
bm25_encoder.dump("bm25_encoder.json")

100%|██████████| 3/3 [00:00<00:00, 35.39it/s]


In [8]:
retriever = PineconeHybridSearchRetriever(embeddings=embeddings  , sparse_encoder= bm25_encoder , index=index)

In [9]:
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x000001E1EF02EFE0>, index=<pinecone.data.index.Index object at 0x000001E1BF6714E0>)

In [10]:
retriever.add_texts(
    [
    "In 2023, I visited Paris",
        "In 2022, I visited New York",
        "In 2021, I visited New Orleans",

]
)

## Here evrything is gettinf inserted inside the index created in Pinecone

100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


In [11]:
retriever.invoke("What city did i visit last?")

[Document(metadata={'score': 0.287748396}, page_content='In 2021, I visited New Orleans'),
 Document(metadata={'score': 0.259554029}, page_content='In 2022, I visited New York'),
 Document(metadata={'score': 0.235975}, page_content='In 2023, I visited Paris')]

In [13]:
retrieved_docs = retriever.invoke('When did i visit New York?')

In [14]:
retrieved_docs

[Document(metadata={'score': 0.599850416}, page_content='In 2022, I visited New York'),
 Document(metadata={'score': 0.340815425}, page_content='In 2021, I visited New Orleans'),
 Document(metadata={'score': 0.204756}, page_content='In 2023, I visited Paris')]

In [16]:
content =[doc.page_content for doc in retrieved_docs]

In [17]:
content

['In 2022, I visited New York',
 'In 2021, I visited New Orleans',
 'In 2023, I visited Paris']

In [20]:
context='\n'.join(content)

In [21]:
context

'In 2022, I visited New York\nIn 2021, I visited New Orleans\nIn 2023, I visited Paris'

In [22]:
from langchain_groq import ChatGroq
llm=ChatGroq(model_name = "gemma2-9b-it" , 
             groq_api_key = "GROQ API KEY" )

In [23]:
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001E180C77CD0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001E180CAC8E0>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [24]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate(input_variables=['input_query' , 'context']  , 
               template= 
               """Use the following context to answer the query and if u dont know the answer then say that:
                Context:
                {context}
                Query:
                {query}
                """
)

In [29]:
from langchain.chains import LLMChain
query = "where did i visit last year"
final_prompt=prompt_template.format(context=context , query=query)
llm_chain = LLMChain(llm = llm , prompt=prompt_template)
response=llm_chain.run({'context':context , 'query':query})
print(response)

  llm_chain = LLMChain(llm = llm , prompt=prompt_template)
  response=llm_chain.run({'context':context , 'query':query})


You visited Paris last year. 

