In [68]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-qdrant fastembed langchain-community qdrant-client langgraph

I0000 00:00:1753944123.081905   46508 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Note: you may need to restart the kernel to use updated packages.


In [69]:
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

In [70]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_2 = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001", google_api_key=env.GOOGLE_API_KEY)

In [71]:
from qdrant_client.http.models import Distance

collection_name = "mcu_packages"
dimension = 3072
distance = Distance.COSINE

# Create Vector Data

In [72]:
# load mcu.json data
import json

with open("doctors_final.json", "r") as f:
    mcu_data = json.load(f)

print(mcu_data[0])

{'id': '5eae0017-40dd-4961-869f-79d9e45d87f2', 'name': 'Adventia Emilia Krysna Sipi Seda, M.M., M.Psi., Psikolog', 'specialization_name': 'Psikologi', 'specialization_name_en': 'Psychology', 'sub_specialization_name': 'Psikolog', 'sub_specialization_name_en': 'Psychologist', 'hospital_name': 'Siloam Hospitals Yogyakarta'}


In [73]:
# Optionally, you can use FastEmbed for embeddings
# from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
# embeddings = FastEmbedEmbeddings(cache_dir="./embedding_cache", model_name="jinaai/jina-embeddings-v2-base-en")
# # https://qdrant.github.io/fastembed/examples/Supported_Models/#supported-text-embedding-models

In [74]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:") 

In [75]:
from qdrant_client.http.models import VectorParams

if(client.collection_exists(collection_name=collection_name) == False):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )

In [76]:

import json
from collections import defaultdict

with open("doctors_final.json", "r") as f:
    mcu_data = json.load(f)


specialization_groups = defaultdict(list)
for doctor in mcu_data:
    spec_key = doctor['specialization_name']
    specialization_groups[spec_key].append(doctor)

print(f"Total specializations: {len(specialization_groups)}")
print(f"Total doctors: {len(mcu_data)}")


for spec, doctors in list(specialization_groups.items())[:5]:
    print(f"{spec}: {len(doctors)} doctors")

Total specializations: 36
Total doctors: 1000
Psikologi: 8 doctors
Kedokteran Umum: 126 doctors
Kardiologi (Jantung): 52 doctors
Bedah Umum: 35 doctors
Obsteri dan Ginekologi (Kandungan): 74 doctors


In [77]:
from qdrant_client.models import PointStruct
import uuid
import time
from tqdm import tqdm

def create_optimized_embeddings():
    """Create embeddings grouped by specialization to reduce API calls"""
    
    specialization_groups = defaultdict(list)
    for doctor in mcu_data:
        spec_key = doctor['specialization_name']
        specialization_groups[spec_key].append(doctor)
    
    total_specializations = len(specialization_groups)
    print(f"Processing {total_specializations} specializations instead of {len(mcu_data)} individual doctors")
    
    processed_count = 0
    
    with tqdm(total=total_specializations, desc="Processing specializations") as pbar:
        for specialization, doctors in specialization_groups.items():
            try:
                doctor_names = [doc['name'] for doc in doctors]
                hospitals = list(set([doc['hospital_name'] for doc in doctors]))
                sub_specs = list(set([doc['sub_specialization_name'] for doc in doctors if doc['sub_specialization_name']]))
                combined_text = f"""
                Specialization: {specialization}
                # all doctors in this specialization
                Doctors: {', '.join(doctor_names)}
                Sub-specializations: {', '.join(sub_specs)}
                Hospitals: {', '.join(hospitals)}
                Total doctors: {len(doctors)}
                """.strip()
                
                emb = embeddings_2.embed_query(combined_text)
                
                points = []
                for doctor in doctors:
                    doctor_text = f"Doctor: {doctor['name']}, Specialization: {specialization}, Hospital: {doctor['hospital_name']}"
                    
                    points.append(
                        PointStruct(
                            id=str(uuid.uuid4()),
                            vector=emb, 
                            payload={
                                "page_content": doctor_text,
                                "metadata": {
                                    "id": doctor['id'],
                                    "name": doctor['name'],
                                    "specialization_name": doctor['specialization_name'],
                                    "sub_specialization_name": doctor['sub_specialization_name'],
                                    "sub_specialization_name_en": doctor.get('sub_specialization_name_en', ''),
                                    "hospital_name": doctor['hospital_name'],
                                    "specialization_group": specialization,
                                    "doctors_in_group": len(doctors)
                                },
                            },
                        )
                    )
                
                client.upsert(collection_name=collection_name, points=points)
                processed_count += len(doctors)
                
                pbar.set_postfix({
                    'doctors_processed': processed_count,
                    'current_spec': specialization[:20] + '...' if len(specialization) > 20 else specialization
                })
                pbar.update(1)
                
                time.sleep(1) 
                
            except Exception as e:
                print(f"Error processing specialization {specialization}: {e}")
                print("Waiting 10 seconds before continuing...")
                time.sleep(10)
                continue
    return processed_count, total_specializations

create_optimized_embeddings()

Processing 36 specializations instead of 1000 individual doctors


Processing specializations: 100%|██████████| 36/36 [00:53<00:00,  1.47s/it, doctors_processed=1000, current_spec=Kedokteran Nuklir]     


(1000, 36)

# Create Tool

In [78]:
from langchain_qdrant import QdrantVectorStore
def get_retriever():

    vector_store = QdrantVectorStore(
        client=client,
        collection_name=collection_name,
        embedding=embeddings_2,
    )
    
    return vector_store.as_retriever()

print(get_retriever())

tags=['QdrantVectorStore', 'GoogleGenerativeAIEmbeddings'] vectorstore=<langchain_qdrant.qdrant.QdrantVectorStore object at 0x7fa1d878d2e0> search_kwargs={}


In [79]:
from langchain_core.tools import tool
from typing import Annotated, List

@tool
def search_doctor(query: Annotated[str, "search query must contain keywords related to doctors"]) -> List[str]:
    """Search for doctors by name or specialization."""
    retriever = get_retriever()
    results = retriever.invoke(query, k=10)
    print(results)
    return [result.page_content for result in results]

In [80]:
search_doctor("gula darah, diabetes, paket medical check up, siloam")

[Document(metadata={'id': 'faf8d251-671c-463b-8f82-588d60d26a51', 'name': 'dr. Ade Rahmatdianto', 'specialization_name': 'Kedokteran Umum', 'sub_specialization_name': 'Dokter Umum', 'sub_specialization_name_en': 'General Practitioner', 'hospital_name': 'Siloam Hospitals Surabaya', 'specialization_group': 'Kedokteran Umum', 'doctors_in_group': 126, '_id': 'ad224cfe-1cb5-4768-8897-19c922a02dd7', '_collection_name': 'mcu_packages'}, page_content='Doctor: dr. Ade Rahmatdianto, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Surabaya'), Document(metadata={'id': '13978ca9-fe9d-46a7-9a59-10ad8d6b5e48', 'name': 'dr. Cindy Mayury', 'specialization_name': 'Kedokteran Umum', 'sub_specialization_name': 'Dokter Umum', 'sub_specialization_name_en': 'General Practitioner', 'hospital_name': 'Siloam Sriwijaya Palembang', 'specialization_group': 'Kedokteran Umum', 'doctors_in_group': 126, '_id': '7938ad35-c3b5-42bb-9c1f-6c95c12b5082', '_collection_name': 'mcu_packages'}, page_content='Doctor

['Doctor: dr. Ade Rahmatdianto, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Surabaya',
 'Doctor: dr. Cindy Mayury, Specialization: Kedokteran Umum, Hospital: Siloam Sriwijaya Palembang',
 'Doctor: dr. Catherine, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Medan',
 'Doctor: dr. Cathleen Kenya, Specialization: Kedokteran Umum, Hospital: MRCCC Siloam Hospitals Semanggi',
 'Doctor: dr. Chow Joy Nathasa A , MH.Kes, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Putera Bahagia',
 'Doctor: dr. Christian Hendrikus Sorongan, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Manado',
 'Doctor: dr. Christina Valentin, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals TB Simatupang',
 'Doctor: dr. Christin Doko Rehi, Specialization: Kedokteran Umum, Hospital: SILOAM CLINIC SOE',
 'Doctor: dr. Christine Margaretha, Specialization: Kedokteran Umum, Hospital: Siloam Hospitals Agora Cempaka Putih',
 'Doctor: dr. Christy Agustiyani, Speci

# Create Agent

In [81]:
# access the Google Gemini API
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=env.GOOGLE_API_KEY,
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that provides information about Siloam hospitals."),
        ("human", "{question}"),
    ]
)

chain = prompt | llm

In [82]:
chain.invoke({"question": "Saya sakit kepala, ada dokter spesialis apa di Siloam?"})

AIMessage(content='Di Siloam Hospitals, terdapat beberapa dokter spesialis yang dapat membantu menangani sakit kepala, tergantung pada penyebab dan jenis sakit kepala yang Anda alami. Berikut adalah beberapa pilihan dokter spesialis yang tersedia:\n\n1.  **Dokter Spesialis Saraf (Neurologi):** Dokter spesialis saraf adalah pilihan yang tepat jika sakit kepala Anda sering terjadi, sangat parah, atau disertai dengan gejala neurologis lainnya seperti gangguan penglihatan, kelemahan, atau mati rasa. Mereka dapat membantu mendiagnosis penyebab sakit kepala seperti migrain, sakit kepala tegang, atau masalah saraf lainnya.\n2.  **Dokter Spesialis Penyakit Dalam:** Jika sakit kepala Anda terkait dengan kondisi medis umum seperti tekanan darah tinggi, infeksi, atau masalah metabolik, dokter spesialis penyakit dalam dapat membantu mengidentifikasi dan mengelola kondisi tersebut.\n3.  **Dokter Spesialis Rehabilitasi Medik:** Jika sakit kepala Anda disebabkan oleh masalah otot atau postur tubuh ya

# Workflow for agent to use tool

In [83]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
class State(TypedDict):
    question: str
    context: List[str]
    search: str
    answer: str

In [84]:
def get_context(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are an expert in Doctor selection.
                You will provide keywords about the the disease or condition based on the question.
                The keywords should be relevant to the disease or condition.
                Do not provide any other information.
                If the question already contains keywords, you can return them as is.
                Only return one keyword and in english.
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"]})
    return {"search": result.content}

In [85]:
def retrieve(state: State):
    retrieved_docs = get_retriever().invoke(state["search"], k=10)
    return {"context": retrieved_docs}

In [86]:
def generate(state: State):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
                You are an assistant that provides information about Doctor Selection at Siloam hospitals.
                You will generate a response based on the context provided.
                The response should be concise and relevant to the question asked.
                package list knowledge: 
                {context}
                If the context is empty, you can provide a general response about Doctor Selection.
                Please always include related packages in your response.
            """),
            ("human", "{question}"),
        ]
    )
    chain = prompt | llm
    result = chain.invoke({"question": state["question"], "context": state["context"]})
    return {"answer": result.content}

In [87]:
graph_builder = StateGraph(State).add_sequence([get_context, retrieve, generate])
graph_builder.add_edge(START, "get_context")
graph = graph_builder.compile()

In [88]:
response = graph.invoke({
	"question": "Saya sakit kepala, ada dokter spesialis apa di Siloam Hospitals Lippo Cikarang?",
	"context": [],
	"search": "",
	"answer": ""
})
print(response["answer"])

Saat ini tidak ada dokter spesialis Neurologi (Otak dan Sistem Saraf) yang tersedia di Siloam Hospitals Lippo Cikarang. Berikut ini dokter spesialis Neurologi (Otak dan Sistem Saraf) yang tersedia di Siloam Hospitals lainnya:
- Dr. dr. Pricilla Yani Gunawan, Sp.N, Subspes.E.N.K (K), F.Neuroimaging-Neurosonology - Siloam Hospitals Lippo Village
- Dr. dr. Vivien Puspitasari, Sp.N - Siloam Hospitals Lippo Village
- Dr. dr. Rocksy Fransisca V Situmeang, Sp.N - Siloam Hospitals Lippo Village
- dr. Evlyne Erlyana Suryawijaya, M.Biomed, SpN, F.NO-NOT - Siloam Hospitals Kelapa Dua
- dr. Feliana, SpN - Siloam Hospitals Semarang
- dr. Ekawati Dani, SpS - MRCCC Siloam Hospitals Semanggi
- Dr. dr. Rini Andriani, SpN, Subsp.N-Onk(K) - MRCCC Siloam Hospitals Semanggi
- dr. Fidiana, SpN (K) - Siloam Hospitals Surabaya
- dr. Enseline Nikijuluw, SpS - Siloam Hospitals Ambon
- dr. A. Zuljumadi Adma, SpN - Siloam Hospitals Buton
