In [4]:
import os
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

load_dotenv()

client = InferenceClient(
    provider="hf-inference",
    api_key=os.environ["HF_TOKEN"],
    model="sentence-transformers/all-MiniLM-L6-v2"
)



In [5]:
client

<InferenceClient(model='sentence-transformers/all-MiniLM-L6-v2', timeout=None)>

In [7]:
# Example: Get embedding for a sample query
embedding = client.embedding("This is a sample sentence.")
embedding

AttributeError: 'InferenceClient' object has no attribute 'embedding'

In [3]:
import requests

url = "https://kra09-embedding-model.hf.space/embed"   # adjust path to your actual route
payload = {"query": "Hello world"}

resp = requests.post(url, json=payload)   # or .get if your route is GET


In [5]:
print(resp.status_code)


200


In [10]:
embeddings = resp.json().get("embeddings","")

In [11]:
len(embeddings)

384

In [14]:
def embed_query(query):
    url = "https://kra09-embedding-model.hf.space/embed"   # adjust path to your actual route
    payload = {"query": query}
    resp = requests.post(url, json=payload)   # or .get if your route is GET
    if resp.status_code == 200:
        return resp.json().get("embeddings","")
    else:
        raise ValueError(f"Error {resp.status_code}: {resp.text}")

In [12]:
from __future__ import annotations

from typing import Any, Dict, List, Optional
from pydantic import PrivateAttr
from langchain.schema import BaseRetriever, Document
from langchain.callbacks.manager import (
    AsyncCallbackManagerForRetrieverRun,
    CallbackManagerForRetrieverRun,
)


In [15]:

class PineconeAsyncRetriever(BaseRetriever):
    """
    A production-ready async retriever for Pinecone + HuggingFace embeddings.

    Parameters
    ----------
    embedding_model : Any
        Must expose an `embed_query(text: str) -> List[float]` method.
    index : Any
        Pinecone index client with an async `query(...)` method.
    k : int, default 3
        Number of documents to retrieve.
    search_kwargs : dict, optional
        Extra parameters for Pinecone's query call.
    tags : list[str], optional
        Custom tags for observability/monitoring.
    """

    # _embedding_model: Any = PrivateAttr()
    _index: Any = PrivateAttr()

    k: int = 3
    tags: Optional[List[str]] = None

    def __init__(
        self,
        # embedding_model: Any,
        index: Any,
        k: int = 3,
        tags: Optional[List[str]] = None,
    ) -> None:
        super().__init__()
        # self._embedding_model = embedding_model
        self._index = index
        self.k = k
        self.tags = tags or ["PineconeVectorStore", "HuggingFaceEmbeddings"]

    # Disable sync usage to force async pattern
    def _get_relevant_documents(
        self,
        query: str,
        *,
        run_manager: CallbackManagerForRetrieverRun,
    ) -> List[Document]:
        raise NotImplementedError(
            "Synchronous retrieval is disabled. Use `aget_relevant_documents`."
        )

    async def _aget_relevant_documents(
        self,
        query: str,
        *,
        run_manager: AsyncCallbackManagerForRetrieverRun,
    ) -> List[Document]:
        """
        Retrieve top-k documents asynchronously from Pinecone.

        Returns
        -------
        list of langchain.schema.Document
        """
        # 1. Embed the query
        try:
            query_vector = embed_query(query)
        except Exception as e:
            raise RuntimeError(f"Embedding failed: {e}") from e

        # 2. Query Pinecone
        try:
            response = await self._index.query(
                vector=query_vector,
                top_k=self.k,
                include_metadata=True,
            )
        except Exception as e:
            raise RuntimeError(f"Pinecone query failed: {e}") from e

        # 3. Build Document list
        documents: List[Document] = []
        for match in response.get("matches", []):
            metadata = match.get("metadata", {})
            text = metadata.get("text")
            if not text:
                continue  # Skip if no text found
            documents.append(
                Document(
                    page_content=text,
                    metadata={
                        "source": metadata.get("source"),
                        "similarity_score": match.get("score"),
                    },
                )
            )

        return documents


In [1]:
from pinecone.pinecone_asyncio import PineconeAsyncio 
from E2EMedicalChatBotWithRAG.utils.helper import load_env_variable

In [2]:
pinecone_api_key = load_env_variable("PINECONE_API_KEY")
pc = PineconeAsyncio(api_key=pinecone_api_key)

[2025-09-26 15:11:47,753|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]


In [18]:
host_name = 'medical-chatbot-60n3d5q.svc.aped-4627-b74a.pinecone.io'

In [19]:
index = pc.IndexAsyncio(host=host_name)

In [20]:
retriever = PineconeAsyncRetriever(index=index)

In [21]:
async_response = await retriever.ainvoke("what is acne")

In [22]:
async_response

[Document(metadata={'source': 'data/Medical_book.pdf', 'similarity_score': 0.663705885}, page_content='The goal of treating moderate acne is to decrease\ninflammation and prevent new comedone formation. One\neffective treatment is topical tretinoin along with a topical\nGALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(metadata={'source': 'data/Medical_book.pdf', 'similarity_score': 0.636903822}, page_content='Description\nAcne vulgaris, the medical term for common acne, is\nthe most common skin disease. It affects nearly 17 million\npeople in the United States. While acne can arise at any\nage, it usually begins at puberty and worsens during ado-\nlescence. Nearly 85% of people develop acne at some time\n

In [1]:
%pwd

'/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG/notebooks'

In [2]:
%cd ..

/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG


In [3]:
from E2EMedicalChatBotWithRAG.vectorestores.async_pinecone_db import AsyncPineconeDB

[2025-09-26 15:13:22,499|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


In [4]:
from pinecone.pinecone_asyncio import PineconeAsyncio
from E2EMedicalChatBotWithRAG.utils.helper import load_env_variable

pinecone_api_key = load_env_variable("PINECONE_API_KEY")
pc = PineconeAsyncio(api_key=pinecone_api_key)

[2025-09-26 15:14:02,591|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]


In [5]:
retriever = await AsyncPineconeDB(client=pc).get_retriever()

In [6]:
retriever

PineconeAsyncRetriever(tags=['PineconeVectorStore', 'HuggingFaceEmbeddings'], search_kwargs={'k': 3})

In [7]:
response = await retriever.ainvoke("what is acne")

In [8]:
response

[Document(metadata={'source': 'data/Medical_book.pdf', 'similarity_score': 0.663705885}, page_content='The goal of treating moderate acne is to decrease\ninflammation and prevent new comedone formation. One\neffective treatment is topical tretinoin along with a topical\nGALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(metadata={'source': 'data/Medical_book.pdf', 'similarity_score': 0.636903822}, page_content='Description\nAcne vulgaris, the medical term for common acne, is\nthe most common skin disease. It affects nearly 17 million\npeople in the United States. While acne can arise at any\nage, it usually begins at puberty and worsens during ado-\nlescence. Nearly 85% of people develop acne at some time\n

In [10]:
await pc.close()

In [1]:
%cd ..

/media/kirti/Dev/GenAI/E2EMedicalChatBotWithRAG


In [2]:
from E2EMedicalChatBotWithRAG.chains.rag_chain import RAGChain

[2025-09-26 15:34:08,700|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


In [3]:
from pinecone.pinecone_asyncio import PineconeAsyncio
from E2EMedicalChatBotWithRAG.utils.helper import load_env_variable

pinecone_api_key = load_env_variable("PINECONE_API_KEY")
pc = PineconeAsyncio(api_key=pinecone_api_key)

[2025-09-26 15:34:21,815|(INFO)| File: helper | Message: Loaded environment variable: PINECONE_API_KEY]


In [4]:
chain = await RAGChain.make_async(client=pc)

[2025-09-26 15:34:23,296|(INFO)| File: helper | Message: Loaded environment variable: GROQ_API_KEY]


In [13]:
question = "who is the president of the USA?"

In [14]:
async for token in chain.ainvoke(question):
    print(token, end="", flush=True)

[2025-09-26 15:36:08,734|(INFO)| File: _client | Message: HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"]
I'm a medical assistant, and I'm here to help with medical-related questions. The presidency of the United States is a topic of general knowledge and politics, which is outside my area of expertise. I don't have information on the current or past presidents of the USA. If you have a medical-related question, I'd be happy to try and assist you.

In [15]:
await pc.close()