In [1]:
%pwd

'/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG/notebooks'

In [2]:
%cd ..

/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG


In [3]:
from E2EMedicalChatBotWithRAG.models.llm_model import LLMAssistant
llm_class = LLMAssistant()
llm = llm_class.get_model()

[2025-09-23 16:56:39,462|(INFO)| File: helper | Message: Loaded environment variable: GROQ_API_KEY]


In [4]:
from langchain_core.prompts import ChatPromptTemplate

In [5]:
from src.E2EMedicalChatBotWithRAG.vector_database.pinecone_db import PineconeDB
sync_retriever = PineconeDB().get_retriever()

[2025-09-23 16:56:46,226|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


[2025-09-23 16:56:54,572|(INFO)| File: SentenceTransformer | Message: Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2]
[2025-09-23 16:57:07,397|(INFO)| File: embedding_model | Message: Successfully loaded embedding model: sentence-transformers/all-MiniLM-L6-v2]


In [6]:
from pinecone.pinecone_asyncio import PineconeAsyncio 
from src.E2EMedicalChatBotWithRAG.utils.helper import load_env_variable

In [7]:
pinecone_api_key = load_env_variable("PINECONE_API_KEY")

[2025-09-23 16:57:16,286|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]


In [8]:
pc = PineconeAsyncio(api_key=pinecone_api_key)

In [15]:
from pinecone import PineconeAsyncio
async def get_list_of_indexes():
    async with PineconeAsyncio(api_key=pinecone_api_key) as pc:
        # Do async things
        index_list = await pc.list_indexes()
        return index_list


In [None]:
list_of_indexes = await get_list_of_indexes()

In [14]:
list_of_indexes

[
    {
        "name": "medical-chatbot",
        "metric": "cosine",
        "host": "medical-chatbot-60n3d5q.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 384,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [17]:
async def get_index(host_name, pinecone_api_key):
    """
    You can find host name by calling get_list_of_indexes() function
    """
    async with PineconeAsyncio(api_key=pinecone_api_key) as pc:
        index = pc.IndexAsyncio(host=host_name)
    return index

In [18]:
async_index = await get_index(host_name="medical-chatbot-60n3d5q.svc.aped-4627-b74a.pinecone.io", pinecone_api_key=pinecone_api_key)

In [19]:
from src.E2EMedicalChatBotWithRAG.models.embedding_model import EmbeddingModel
embedding_model = EmbeddingModel()._get_model()

[2025-09-23 17:02:48,689|(INFO)| File: SentenceTransformer | Message: Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2]
[2025-09-23 17:02:55,747|(INFO)| File: embedding_model | Message: Successfully loaded embedding model: sentence-transformers/all-MiniLM-L6-v2]


In [20]:
from langchain.schema import Document

In [39]:
class AsyncPineconeRetriever:
    def __init__(self, index, embedding_model, k=3):
        self.index = index
        self.embedding_model = embedding_model
        self.k = k

    async def get_relevant_docs(self, query_text):
        # 1. Get embedding of the query
        query_vector = self.embedding_model.embed_query(query_text)

        # 2. Query Pinecone
        response = await self.index.query(vector=query_vector,
                                         top_k=self.k,
                                        include_metadata=True)
        docs = []
        for match in response['matches']:
            docs.append(Document(
                    # id=match["id"],
                    page_content=match['metadata']['text'],
                    metadata={"source": match['metadata']['source'],
                              "similarity_score": match['score']}
                ))
        return docs

In [43]:
# retriever = AsyncPineconeRetriever(index=async_index, embedding_model=embedding_model, k=3)
# docs = await retriever.get_relevant_docs("what is acne")
# docs

In [44]:
from langchain.callbacks.manager import AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun
from langchain.schema import BaseRetriever, Document
from typing import Any, Optional
from pydantic import PrivateAttr

In [45]:
class LangChainAsyncRetriever(BaseRetriever):
    # Make async_retriever private to avoid Pydantic validation issues
    _async_retriever: Any = PrivateAttr()
    
    k: int = 3
    search_kwargs: dict = {}
    tags: Optional[list[str]] = None

    def __init__(self, async_retriever: AsyncPineconeRetriever, k: int = 3):
        super().__init__()
        self._async_retriever = async_retriever
        self.k = k
        self.search_kwargs = {"k": k}
        self.tags = ["PineconeVectorStore", "HuggingFaceEmbeddings"]

    # We can skip implementing the sync version if you only plan async usage
    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> list[Document]:
        raise NotImplementedError("Use async version only")

    async def _aget_relevant_documents(self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun) -> list[Document]:
        docs = await self._async_retriever.get_relevant_docs(query)
        return docs

In [47]:
# Create Pinecone async wrapper
pinecone_retriever = AsyncPineconeRetriever(index=async_index, embedding_model=embedding_model)

# Wrap it in LangChain async retriever
retriever = LangChainAsyncRetriever(async_retriever=pinecone_retriever)


In [48]:
async_response = await retriever.ainvoke("who build this project?")

In [49]:
async_response

[Document(metadata={'source': 'kirti pogra', 'similarity_score': 0.363729477}, page_content='This project is built by Kirti Pogra using LangChain, Pinecone and Groq. The code is available on GitHub. This project is only for educational purposes.'),
 Document(metadata={'source': 'data/Medical_book.pdf', 'similarity_score': 0.267733604}, page_content='ty for errors, omissions or discrepancies. The Gale Group accepts no\npayment for listing, and inclusion in the publication of any organiza-\ntion, agency, institution, publication, service, or individual does not\nimply endorsement of the editor or publisher. Errors brought to the\nattention of the publisher and verified to the satisfaction of the publish-\ner will be corrected in future editions.\nThis book is printed on recycled paper that meets Environmental Pro-\ntection Agency standards.\nThe paper used in this publication meets the minimum requirements of\nAmerican National Standard for Information Sciences-Permanence\nPaper for Prin

In [None]:
# For synchronous calls, we already have sync_retriever from PineconeDB class
sync_response = sync_retriever.invoke("what is acne")

In [52]:
sync_response

[Document(id='05b4dceb-33ab-4aca-8e0d-86398d62862c', metadata={'source': 'data/Medical_book.pdf'}, page_content='The goal of treating moderate acne is to decrease\ninflammation and prevent new comedone formation. One\neffective treatment is topical tretinoin along with a topical\nGALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='c48a9784-043a-49c8-8492-5ea87845ea8d', metadata={'source': 'data/Medical_book.pdf'}, page_content='Description\nAcne vulgaris, the medical term for common acne, is\nthe most common skin disease. It affects nearly 17 million\npeople in the United States. While acne can arise at any\nage, it usually begins at puberty and worsens during ado-\nlescence. Nearly 85% of people develop

In [53]:
system_prompt = """You are a helpful AI medical assistant that helps people find information.
You are given the following extracted parts of a long document and a question. Provide a conversational answer
based on the context provided.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
Always include relevant sources in your answer.
Make sure the answer is in detail and more than 100 words.

Context: {context}

If the question is out of context, politely inform them that you are tuned to only answer questions that are related to the context.
"""

In [54]:
prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt),
                ("user", "{input}")
            ]     
)

In [55]:
from langchain.schema.runnable import RunnablePassthrough

async_chain = (
    {
        "context": retriever,          # Pinecone search → returns docs
        "input": RunnablePassthrough()  # pass the raw user query
    }
    | prompt
    | llm 
)


In [56]:
sync_chain = (
    {
        "context": sync_retriever,          # Pinecone search → returns docs
        "input": RunnablePassthrough()  # pass the raw user query
    }
    | prompt
    | llm  # ChatOpenAI(streaming=True) etc.
)

In [57]:
async def stream_answer(query):
    # Keep the client alive during streaming
    async for token in async_chain.astream(query):
        print(token.content, end="", flush=True)  # live streaming in notebook


In [58]:
# Example usage
await stream_answer("who build this project")


[2025-09-23 17:20:51,842|(INFO)| File: _client | Message: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"]
Based on the provided context, the project was built by Kirti Pogra using LangChain, Pinecone, and Groq. The code for this project is available on GitHub, and it is for educational purposes only.

In [59]:
for token in sync_chain.stream("who build this project?"):
    print(token.content, end='', flush=True)

[2025-09-23 17:21:02,003|(INFO)| File: _client | Message: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"]
Based on the provided context, it appears that the project was built by Kirti Pogra. This information is mentioned in the first document (id='602d3e94-ceaa-46db-847e-c6cbf92d42df') which states: "This project is built by Kirti Pogra using LangChain, Pinecone and Groq."

The project is also mentioned to be available on GitHub, but no further information is provided about the project itself. 

Source: Document(id='602d3e94-ceaa-46db-847e-c6cbf92d42df')