In [51]:
import os
from dotenv import load_dotenv 
from langchain_community.retrievers import PineconeHybridSearchRetriever
from pinecone import Pinecone,ServerlessSpec
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from pinecone_text.sparse import BM25Encoder
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.vectorstores.faiss import FAISS
from langchain.prompts import ChatPromptTemplate 
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import nltk
load_dotenv()
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\mehdi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [2]:
index_database_name = "hybrid-rag-search-langchain-db"
pinecone = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
api_key = os.getenv('OPENAI_API_KEY')

if index_database_name not in pinecone.list_indexes().names():
    pinecone.create_index(
        name=index_database_name,
        dimension=384,
        metric="dotproduct",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ),
    )

In [3]:
pinecone_index = pinecone.Index(index_database_name)
pinecone_index

<pinecone.data.index.Index at 0x1dac15f6590>

In [6]:
dense_embeddingsModel = OpenAIEmbeddings(model="text-embedding-3-small", api_key=os.getenv('OPENAI_API_KEY'))

In [15]:
documents = [
    "In the morning , i eat eggs and cup of milk",
    "In the afternoon  , i eat chicken and the some potatos",
    "In the evening , i eat a salade and fuits"
    ]

In [20]:
bm25_retriever = BM25Retriever.from_texts(documents)
bm25_retriever.k = 3
bm25_retriever.get_relevant_documents('milk')

<bound method BaseModel.dict of BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001DAC5CF2200>, k=3)>

In [43]:
faiss_vectorestore = FAISS.from_texts(documents,dense_embeddingsModel)
faiss_retriever = faiss_vectorestore.as_retriever(kwargs={
    'search_kwargs':3
})

In [44]:
faiss_retriever.get_relevant_documents('fruits')

[Document(page_content='In the evening , i eat a salade and fuits'),
 Document(page_content='In the morning , i eat eggs and cup of milk'),
 Document(page_content='In the afternoon  , i eat chicken and the some potatos')]

In [50]:
ensembleRetriever = EnsembleRetriever(retrievers=[faiss_retriever,bm25_retriever],weights=[0.5,0.5])

In [49]:
llmModel = ChatOpenAI(model="gpt-4o",api_key=os.getenv('OPENAI_API_KEY'))
prompt=ChatPromptTemplate.from_template(
    """
    try to give answers of the following question based only on the provided context between <context> tags.
    Think step by step before generating the detailed answer.
    <context>
    {context}
    </context>
    Question : {input}
    """
)
document_chain=create_stuff_documents_chain(llm=llmModel,prompt=prompt)

In [54]:
retrievel_chain=create_retrieval_chain(ensembleRetriever,document_chain)
retriever_response=retrievel_chain.invoke({"input":"what i usually eat in the morning ?"})
retriever_response_dict={
    "question":retriever_response['input'],
    "answer":retriever_response['answer']
}
retriever_response_dict["answer"]

'Let\'s break down the information provided in the context step by step to determine what you usually eat in the morning.\n\n1. The context mentions different meals for various times of the day.\n2. For the morning, it states: "In the morning, I eat eggs and cup of milk."\n3. This clearly specifies the items you consume in the morning.\n\nTherefore, based on the provided context, you usually eat **eggs** and a **cup of milk** in the morning.'