In [1]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-qdrant fastembed langchain-community qdrant-client langgraph




In [2]:
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

# Create embeddings for doctor specialties/descriptions

In [27]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001", google_api_key=env.GOOGLE_API_KEY)

# Create Vector Data

In [4]:
# load doctors_final.json data
import json

with open("doctors_final.json", "r") as f:
    doctor_data = json.load(f)

print(doctor_data[0])

{'id': '5eae0017-40dd-4961-869f-79d9e45d87f2', 'name': 'Adventia Emilia Krysna Sipi Seda, M.M., M.Psi., Psikolog', 'specialization_name': 'Psikologi', 'specialization_name_en': 'Psychology', 'sub_specialization_name': 'Psikolog', 'sub_specialization_name_en': 'Psychologist', 'hospital_name': 'Siloam Hospitals Yogyakarta'}


# Set up Qdrant vector DB in memory 

In [18]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.models import PointStruct
import uuid


collection_name = "doctors"
dimension = 768
distance = Distance.COSINE

In [11]:
if not client.collection_exists(collection_name=collection_name):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )
    print(f"Collection '{collection_name}' created with dimension {dimension} and distance {distance}.")
else:
    print(f"Collection '{collection_name}' already exists.")


Collection 'doctors' already exists.


# doctors vectors

In [29]:
import time
import uuid
import json
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import PointStruct
# Batching and Rate Limit Settings
BATCH_SIZE = 30  # Number of documents to embed in one API call and upsert
RETRY_ATTEMPTS = 5
INITIAL_RETRY_DELAY = 1  # Seconds for initial backoff

In [20]:
import time
import uuid
import json
import os
from dotenv import load_dotenv

from pydantic_settings import BaseSettings, SettingsConfigDict
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.models import PointStruct
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.documents import Document # <--- THIS LINE IS ADDED/CORRECTED

# Load environment variables from .env file
load_dotenv()


True

In [None]:
total_docs = len(doctor_data)
print(f"Starting embedding and upsert process for {total_docs} documents...")
print(f"Type of 'embeddings' before loop: {type(embeddings)}")
# --- End debugging print statement ---

for i in range(0, total_docs, BATCH_SIZE):
    batch_data = doctor_data[i:i + BATCH_SIZE]

    # Prepare texts and metadata for LangChain's embed_documents
    langchain_docs = []
    original_metadata_in_batch = [] # To store original doc for Qdrant payload

    for doc in batch_data:
        # The `description` field might be missing, so use .get with a default empty string
        text_content = f"Doctor: {doc.get('name', '')}, Specialty: {doc.get('specialization_name', '')}, Sub-Specialty: {doc.get('sub_specialization_name', '')}, Hospital: {doc.get('hospital_name', '')}, Description: {doc.get('description', '')}"

        # LangChain's Document class is useful here for structuring
        langchain_docs.append(Document(page_content=text_content, metadata=doc))
        original_metadata_in_batch.append(doc)

    current_delay = INITIAL_RETRY_DELAY
    embeddings_list = [] # Initialize embeddings_list here to ensure it's always defined

    for attempt in range(RETRY_ATTEMPTS):
        try:
            # Use embed_documents for batching. This method handles internal API calls efficiently.
            print(f"Embedding batch {i//BATCH_SIZE + 1} ({len(langchain_docs)} documents) with LangChain...")

            # Pass only the page_content from the Document objects to embed_documents
            embeddings_list = embeddings.embed_documents([doc.page_content for doc in langchain_docs])

            break # Success, exit retry loop
        except Exception as e:
            print(f"Error embedding batch {i//BATCH_SIZE + 1}, attempt {attempt + 1}/{RETRY_ATTEMPTS}: {e}")
            # LangChain's embeddings often handle retries internally, but if you still see errors
            # (like ResourceExhausted), add an outer explicit delay for persistent rate limits.
            if "ResourceExhausted" in str(e) or "429" in str(e) or "quota" in str(e).lower():
                print(f"Rate limit or resource error. Waiting for {current_delay:.2f} seconds before retrying...")
                time.sleep(current_delay)
                current_delay *= 2 # Exponential backoff
            else:
                print(f"Non-retryable error: {e}")
                break # Exit retry loop for other errors
    else: # This 'else' block executes if the loop completes without a 'break' (i.e., all retries failed)
        print(f"Failed to embed batch {i//BATCH_SIZE + 1} after {RETRY_ATTEMPTS} attempts. Skipping this batch.")
        continue # Skip to the next batch

    if not embeddings_list: # If no embeddings were generated due to errors (e.g., all retries failed)
        print(f"No embeddings generated for batch {i//BATCH_SIZE + 1}. Skipping upsert.")
        continue

    # Prepare points for Qdrant upsert
    points_to_upsert = []
    for j, emb in enumerate(embeddings_list):
        original_doc = original_metadata_in_batch[j]
        text_content = langchain_docs[j].page_content # Get the exact text that was embedded

        points_to_upsert.append(
            PointStruct(
                id=original_doc.get("id") or str(uuid.uuid4()), # Use existing ID or generate new
                vector=emb,
                payload={
                    "page_content": text_content,
                    "metadata": original_doc # Store the full original document as metadata
                }
            )
        )

    # Upsert the batch to Qdrant
    current_delay = INITIAL_RETRY_DELAY # Reset delay for upsert retries
    for attempt in range(RETRY_ATTEMPTS):
        try:
            client.upsert(
                collection_name=collection_name,
                points=points_to_upsert,
                wait=True 
            )
            print(f"Successfully upserted batch {i//BATCH_SIZE + 1} ({len(points_to_upsert)} documents) to Qdrant.")
            break 
        except Exception as e:
            print(f"Error upserting batch {i//BATCH_SIZE + 1} to Qdrant, attempt {attempt + 1}/{RETRY_ATTEMPTS}: {e}")
            print(f"Waiting for {current_delay:.2f} seconds before retrying Qdrant upsert...")
            time.sleep(current_delay)
            current_delay *= 2 # Exponential backoff
    else:
        print(f"Failed to upsert batch {i//BATCH_SIZE + 1} to Qdrant after {RETRY_ATTEMPTS} attempts. Data for this batch might be missing.")

print("\nEmbedding and upsert process completed.")

# Optional: Verify the count of points in the collection
try:
    collection_info = client.get_collection(collection_name=collection_name)
    print(f"\nTotal points in Qdrant collection '{collection_name}': {collection_info.points_count}")
except Exception as e:
    print(f"Could not retrieve collection info: {e}")

Starting embedding and upsert process for 1000 documents...
Type of 'embeddings' before loop: <class 'langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings'>
Embedding batch 1 (30 documents) with LangChain...
Error embedding batch 1, attempt 1/5: Error embedding content: 429 Resource has been exhausted (e.g. check quota).
Rate limit or resource error. Waiting for 1.00 seconds before retrying...
Embedding batch 1 (30 documents) with LangChain...
Error embedding batch 1, attempt 2/5: Error embedding content: 429 Resource has been exhausted (e.g. check quota).
Rate limit or resource error. Waiting for 2.00 seconds before retrying...
Embedding batch 1 (30 documents) with LangChain...
Error embedding batch 1, attempt 3/5: Error embedding content: 429 Resource has been exhausted (e.g. check quota).
Rate limit or resource error. Waiting for 4.00 seconds before retrying...
Embedding batch 1 (30 documents) with LangChain...
Error embedding batch 1, attempt 4/5: Error embedding content:

In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# 1. Set up the LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=env.GOOGLE_API_KEY,
)

# 2. Define a function to recommend doctors using symptoms and LLM
from langchain_qdrant import QdrantVectorStore

def recommend_doctor_with_llm(symptom_query, top_k=5):
    # Vector search for relevant doctors
    vector_store = QdrantVectorStore(
        client=client,
        collection_name=collection_name,
        embedding=embeddings,
    )
    retriever = vector_store.as_retriever()
    results = retriever.invoke(symptom_query, k=top_k)
    context = "\n".join([r.page_content for r in results])

    # Use LLM to generate a recommendation
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helpful assistant that recommends doctors at Siloam Hospitals based on user symptoms and doctor specialties."),
        ("human", "User symptoms: {symptoms}\nDoctor list:\n{context}\n\nWhich doctor(s) would you recommend and why?")
    ])
    chain = prompt | llm
    response = chain.invoke({"symptoms": symptom_query, "context": context})
    return response.content

# 3. Example usage
user_symptom = "I have chest pain and shortness of breath"
recommendation = recommend_doctor_with_llm(user_symptom)
print(recommendation)

QdrantVectorStoreError: Existing Qdrant collection is configured for dense vectors with 3072 dimensions. Selected embeddings are 0-dimensional. If you want to recreate the collection, set `force_recreate` parameter to `True`.