# Demo for MedAgent - First answer generation with naive RAG pipeline

This is the manual testing playground to test some basic workflows later properly implemented in the MedAgent repository.

This file is responsible for a first test of answer generation with naive retrieval (basically creating the second baseline for our system test). This means, for the question first the most similar chunks from the guidelines are retrieved, and then provided to a generator with the original question. For this setup, new feedback must be gathered and the results analyzed and visualized.

In [1]:
# SETUP
import os
import requests
import sys
import tiktoken
from dotenv import load_dotenv
from typing import List

sys.path.append(os.path.abspath("../src"))
from general.data_model.guideline_metadata import GuidelineMetadata
from general.helper.mongodb_interactor import MongoDBInterface, CollectionName
from general.helper.embedder import OpenAIEmbedder
from general.helper.logging import logger
from scripts.Guideline.guideline_interaction import get_plain_text_from_pdf
from scripts.System.system_setup import load_system_json
from scripts.System.system_interaction import init_workflow, init_workflow_with_id, init_chat, pose_question

load_dotenv(dotenv_path="../.local-env")
BACKEND_API_URL = "http://host.docker.internal:5000/api"
mongo_url = os.getenv("MONGO_URL", "mongodb://mongo:mongo@host.docker.internal:27017/")

weaviate_db_config = load_system_json("./input/database_setups/weaviate_custom_vectorizer.json")
naive_rag_azure_config = load_system_json("./input/system/naive_rag_azure.json")
text_output_dir = "output/guideline/plain_text/"
for file_or_dir in [text_output_dir]:
    os.makedirs(os.path.dirname(file_or_dir), exist_ok=True)

dbi = MongoDBInterface(mongo_url)
dbi.register_collections(
    CollectionName.GUIDELINES,
    CollectionName.WORKFLOW_SYSTEMS
)

## Setup vector database
In the first jupyter notebook, the guideline were already downloaded and stored in a MongoDB. To now be utilizable for the naive RAG flow, their content now needs to be cut up and stored in a vector database (for now Milvus with chunk size of 512).

In [2]:
guideline_documents = list(dbi.get_collection(CollectionName.GUIDELINES).find())
guidelines = [
    dbi.document_to_guideline_metadata(doc) for doc in guideline_documents
]

In [10]:
# comment out if not want to overwrite
response = requests.delete(f"{BACKEND_API_URL}/knowledge/vector/retriever/delete/{weaviate_db_config['class_name']}")
logger.info(f"Result of deletion for {weaviate_db_config['class_name']}: {response}")

response = requests.post(f"{BACKEND_API_URL}/knowledge/vector/retriever/init", json=weaviate_db_config)
try:
    response.raise_for_status()
    logger.info(response)
except Exception as e:
    detail = response.json().get("detail", "")
    if "already exists" in detail:
        logger.info(f"Weaviate collection already exists: {detail}")
    else:
        logger.error(f"Failed to initialize Weaviate collection: {detail}")
        raise

[37m2025-04-16 11:53:30[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResult of deletion for GuidelineChunksCustomVector: <Response [200]>[0m
[37m2025-04-16 11:53:30[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208m<Response [200]>[0m


In [4]:
#embedder = OpenAIEmbedder(
#    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
#    api_base=os.getenv("AZURE_OPENAI_API_BASE"),
#    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
#    deployment_name="text-embedding-3-small" # or later: text-embedding-3-small
#)

encoding = tiktoken.get_encoding("cl100k_base")

def chunk_text(text: str, max_tokens: int = 512) -> List[str]:
    words = text.split()
    chunks, current = [], []
    token_count = lambda x: len(encoding.encode(" ".join(x)))

    for word in words:
        current.append(word)
        if token_count(current) >= max_tokens:
            chunks.append(" ".join(current[:-1]))
            current = [word]
    if current:
        chunks.append(" ".join(current))

    return chunks

In [36]:
def insert_for_guideline(guideline: GuidelineMetadata):
    logger.info(f"Processing guideline {guideline.awmf_register_number} ({guideline.download_information.page_count} pages)")
    text = get_plain_text_from_pdf(guideline.download_information.file_path, text_output_dir)
    chunks = chunk_text(text)
    if chunks == []:
        logger.error(f"[{g.awmf_register_numner}] Something went wrong with reading the text or chunking -> empty")
    logger.progress(f"Processing guideline {guideline.awmf_register_number} [PROGRESS]: ", 0, len(chunks))
    non_successful_chunks = []
    for i_c, chunk in enumerate(chunks):
        try:
            #vector = embedder.embed(chunk)
            insert_entity = {
                "text": chunk,
                #"vector": vector,
                "metadata": {
                    "guideline_id": guideline.awmf_register_number,
                    "chunk_index": i_c
                },
                "class_name": weaviate_db_config['class_name']
            }
            #logger.info(insert_entity)
            response = requests.post(
                f"{BACKEND_API_URL}/knowledge/vector/retriever/insert",
                json = insert_entity
            )
            response.raise_for_status()
        except Exception as chunk_error:
            logger.error(f"[{g.awmf_register_number}] Failed to process chunk {i_c}: {chunk_error}")
            non_successful_chunks.append({i_c: chunk})
        
        logger.progress(f"Processing guideline {guideline.awmf_register_number} [PROGRESS]:", i_c+1, len(chunks))

    if non_successful_chunks != []:
        logger.error(f"Problems with inserting these chunks: {non_successful_chunks}")
    else:
        logger.success(f"Successfully transferred whole guideline with {len(chunks)} chunks")

def insert_batch_for_guideline(guideline: GuidelineMetadata):
    logger.info(f"Processing guideline {guideline.awmf_register_number} ({guideline.download_information.page_count} pages)")
    text = get_plain_text_from_pdf(guideline.download_information.file_path, text_output_dir)
    chunks = chunk_text(text)
    if chunks == []:
        logger.error(f"[{g.awmf_register_numner}] Something went wrong with reading the text or chunking -> empty")
    logger.progress(f"Tranforming chunks {guideline.awmf_register_number} [PROGRESS]: ", 0, len(chunks))

    batch_entities = []
    for i_c, chunk in enumerate(chunks):
        #vector = embedder.embed(chunk)
        insert_entity = {
            "text": chunk,
            #"vector": vector,
            "metadata": {
                "guideline_id": guideline.awmf_register_number,
                "chunk_index": i_c
            },
            "class_name": weaviate_db_config['class_name']
        }
        batch_entities.append(insert_entity)
        logger.progress(f"Tranforming chunks {guideline.awmf_register_number} [PROGRESS]: ", i_c+1, len(chunks))

    logger.info(f"Submitting batch upload")
    response = requests.post(
        f"{BACKEND_API_URL}/knowledge/vector/retriever/insertBatch",
        json = {
            "class_name": weaviate_db_config['class_name'],
            "entries": batch_entities
        }
    )
    response.raise_for_status()
    logger.info(f"Response: {response.json()}")
    return response.json(), len(chunks)

In [41]:
inserted_guidelines = {
    0: {'guideline_awmf_nr': '007-106l',
  'number_pages': 152,
  'number_chunks': 316,
  'missing_chunks': []},
 1: {'guideline_awmf_nr': '017-066l',
  'number_pages': 50,
  'number_chunks': 85,
  'missing_chunks': []},
 2: {'guideline_awmf_nr': '007-064l',
  'number_pages': 102,
  'number_chunks': 167,
  'missing_chunks': []},
 3: {'guideline_awmf_nr': '083-042l',
  'number_pages': 98,
  'number_chunks': 104,
  'missing_chunks': []},
 4: {'guideline_awmf_nr': '083-043l',
  'number_pages': 164,
  'number_chunks': 230,
  'missing_chunks': []},
 5: {'guideline_awmf_nr': '083-005l',
  'number_pages': 64,
  'number_chunks': 107,
  'missing_chunks': []},
 6: {'guideline_awmf_nr': '032-052OLl',
  'number_pages': 425,
  'number_chunks': 765,
  'missing_chunks': []},
 7: {'guideline_awmf_nr': '007-086l',
  'number_pages': 31,
  'number_chunks': 36,
  'missing_chunks': []},
 8: {'guideline_awmf_nr': '017-025l',
  'number_pages': 26,
  'number_chunks': 36,
  'missing_chunks': []},
 9: {'guideline_awmf_nr': '001-024l',
  'number_pages': 85,
  'number_chunks': 133,
  'missing_chunks': []},
 10: {'guideline_awmf_nr': '083-022l',
  'number_pages': 47,
  'number_chunks': 71,
  'missing_chunks': []},
 11: {'guideline_awmf_nr': '007-066l',
  'number_pages': 74,
  'number_chunks': 110,
  'missing_chunks': []},
 12: {'guideline_awmf_nr': '075-004l',
  'number_pages': 54,
  'number_chunks': 146,
  'missing_chunks': []},
 13: {'guideline_awmf_nr': '003-001l',
  'number_pages': 238,
  'number_chunks': 418,
  'missing_chunks': []},
 14: {'guideline_awmf_nr': '032-054OLl',
  'number_pages': 558,
  'number_chunks': 927,
  'missing_chunks': []},
 15: {'guideline_awmf_nr': '083-039l',
  'number_pages': 60,
  'number_chunks': 71,
  'missing_chunks': []},
 16: {'guideline_awmf_nr': '083-054l',
  'number_pages': 111,
  'number_chunks': 132,
  'missing_chunks': []},
 17: {'guideline_awmf_nr': '017-082OLl',
  'number_pages': 262,
  'number_chunks': 454,
  'missing_chunks': []},
 18: {'guideline_awmf_nr': '013-077l',
  'number_pages': 58,
  'number_chunks': 74,
  'missing_chunks': []},
 19: {'guideline_awmf_nr': '032-051OLl',
  'number_pages': 266,
  'number_chunks': 466,
  'missing_chunks': []},
 20: {'guideline_awmf_nr': '007-101l',
  'number_pages': 43,
  'number_chunks': 54,
  'missing_chunks': []},
 21: {'guideline_awmf_nr': '017-075l',
  'number_pages': 32,
  'number_chunks': 47,
  'missing_chunks': []},
 22: {'guideline_awmf_nr': '083-015l',
  'number_pages': 74,
  'number_chunks': 91,
  'missing_chunks': []},
 23: {'guideline_awmf_nr': '083-020l',
  'number_pages': 140,
  'number_chunks': 266,
  'missing_chunks': []},
 24: {'guideline_awmf_nr': '017-068l',
  'number_pages': 61,
  'number_chunks': 89,
  'missing_chunks': []},
 25: {'guideline_awmf_nr': '007-006l',
  'number_pages': 47,
  'number_chunks': 72,
  'missing_chunks': []},
 26: {'guideline_awmf_nr': '083-053l',
  'number_pages': 118,
  'number_chunks': 135,
  'missing_chunks': []},
 27: {'guideline_awmf_nr': '075-005l',
  'number_pages': 37,
  'number_chunks': 68,
  'missing_chunks': []},
 28: {'guideline_awmf_nr': '083-025l',
  'number_pages': 66,
  'number_chunks': 79,
  'missing_chunks': []},
 29: {'guideline_awmf_nr': '013-085l',
  'number_pages': 37,
  'number_chunks': 37,
  'missing_chunks': []},
 30: {'guideline_awmf_nr': '083-026l',
  'number_pages': 29,
  'number_chunks': 35,
  'missing_chunks': []},
 31: {'guideline_awmf_nr': '083-018l',
  'number_pages': 35,
  'number_chunks': 46,
  'missing_chunks': []},
 32: {'guideline_awmf_nr': '017-076OLl',
  'number_pages': 147,
  'number_chunks': 253,
  'missing_chunks': []},
 33: {'guideline_awmf_nr': '183-001l',
  'number_pages': 421,
  'number_chunks': 532,
  'missing_chunks': []},
 34: {'guideline_awmf_nr': '007-065l',
  'number_pages': 146,
  'number_chunks': 270,
  'missing_chunks': []},
 35: {'guideline_awmf_nr': '040-014l',
  'number_pages': 244,
  'number_chunks': 252,
  'missing_chunks': []},
 36: {'guideline_awmf_nr': '049-016l',
  'number_pages': 82,
  'number_chunks': 142,
  'missing_chunks': []},
 37: {'guideline_awmf_nr': '007-003l',
  'number_pages': 34,
  'number_chunks': 45,
  'missing_chunks': []},
 38: {'guideline_awmf_nr': '007-007l',
  'number_pages': 53,
  'number_chunks': 82,
  'missing_chunks': []},
 39: {'guideline_awmf_nr': '083-023l',
  'number_pages': 79,
  'number_chunks': 101,
  'missing_chunks': []}
}

In [None]:
for i in range(len(guidelines)):
    if i in inserted_guidelines.keys():
        continue

    res, num_chunks = insert_batch_for_guideline(guidelines[i])
    inserted_guidelines[i] = {
        "guideline_awmf_nr": guidelines[i].awmf_register_number,
        "number_pages": guidelines[i].download_information.page_count,
        "number_chunks": num_chunks,
        "missing_chunks": res["failed"]
    }
    print(inserted_guidelines)

[37m2025-04-16 15:53:25[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 063-001l (94 pages)[0m
[37m2025-04-16 15:53:39[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 15:55:36[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 235, 'failed': []}[0m
[37m2025-04-16 15:55:36[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 067-009l (483 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 15:56:38[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:04:51[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 788, 'failed': []}[0m
[37m2025-04-16 16:04:51[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 083-045l (45 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:04:58[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:05:34[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 56, 'failed': []}[0m
[37m2025-04-16 16:05:34[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 025-005l (30 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:05:38[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:06:18[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 49, 'failed': []}[0m
[37m2025-04-16 16:06:18[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 007-061l (162 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:06:38[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:09:15[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 261, 'failed': []}[0m
[37m2025-04-16 16:09:15[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 083-048l (446 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:10:13[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:17:46[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 660, 'failed': []}[0m
[37m2025-04-16 16:17:46[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 083-009l (66 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:17:54[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:19:00[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 98, 'failed': []}[0m
[37m2025-04-16 16:19:00[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 032-044OLl (255 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:19:35[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:23:28[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 421, 'failed': []}[0m
[37m2025-04-16 16:23:28[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 017-049 (95 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:23:42[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:25:06[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 145, 'failed': []}[0m
[37m2025-04-16 16:25:06[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 083-035l (56 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:25:14[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:25:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 78, 'failed': []}[0m
[37m2025-04-16 16:25:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 032-023l (44 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:26:02[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:26:30[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 59, 'failed': []}[0m
[37m2025-04-16 16:26:30[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 076-001l (411 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:27:41[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:33:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 732, 'failed': []}[0m
[37m2025-04-16 16:33:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 076-006l (253 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:34:47[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:38:19[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 349, 'failed': []}[0m
[37m2025-04-16 16:38:19[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 039-093l (34 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:38:22[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:38:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 26, 'failed': []}[0m
[37m2025-04-16 16:38:55[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 083-033l (62 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:39:06[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:39:53[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 74, 'failed': []}[0m
[37m2025-04-16 16:39:53[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 007-096l (32 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:39:58[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m
[37m2025-04-16 16:40:34[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mResponse: {'class': 'GuidelineChunksCustomVector', 'status': 'completed', 'success': 57, 'failed': []}[0m
[37m2025-04-16 16:40:34[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mProcessing guideline 007-089l (96 pages)[0m


{0: {'guideline_awmf_nr': '007-106l', 'number_pages': 152, 'number_chunks': 316, 'missing_chunks': []}, 1: {'guideline_awmf_nr': '017-066l', 'number_pages': 50, 'number_chunks': 85, 'missing_chunks': []}, 2: {'guideline_awmf_nr': '007-064l', 'number_pages': 102, 'number_chunks': 167, 'missing_chunks': []}, 3: {'guideline_awmf_nr': '083-042l', 'number_pages': 98, 'number_chunks': 104, 'missing_chunks': []}, 4: {'guideline_awmf_nr': '083-043l', 'number_pages': 164, 'number_chunks': 230, 'missing_chunks': []}, 5: {'guideline_awmf_nr': '083-005l', 'number_pages': 64, 'number_chunks': 107, 'missing_chunks': []}, 6: {'guideline_awmf_nr': '032-052OLl', 'number_pages': 425, 'number_chunks': 765, 'missing_chunks': []}, 7: {'guideline_awmf_nr': '007-086l', 'number_pages': 31, 'number_chunks': 36, 'missing_chunks': []}, 8: {'guideline_awmf_nr': '017-025l', 'number_pages': 26, 'number_chunks': 36, 'missing_chunks': []}, 9: {'guideline_awmf_nr': '001-024l', 'number_pages': 85, 'number_chunks': 133,

[37m2025-04-16 16:40:47[0m [37m[[0m[1m[38;5;208mINFO[0m[37m][0m [38;5;208mSubmitting batch upload[0m


## Test out question

In [4]:
naive_rag_azure_wf = dbi.get_entry(CollectionName.WORKFLOW_SYSTEMS, "name", naive_rag_azure_config["name"])
if naive_rag_azure_wf is None:
    naive_rag_azure_wf_id = init_workflow(BACKEND_API_URL, naive_rag_azure_config)
else:
    naive_rag_azure_wf_id = dbi.document_to_workflow_system(naive_rag_azure_wf).workflow_id
    naive_rag_azure_wf_id = init_workflow_with_id(BACKEND_API_URL, naive_rag_azure_config, naive_rag_azure_wf_id)

naive_rag_azure_chat = init_chat(BACKEND_API_URL, naive_rag_azure_wf_id)
question = dbi.get_collection(CollectionName.QUESTIONS).find_one().get("question")
answer, response_latency = pose_question(BACKEND_API_URL, naive_rag_azure_chat, question)

print(f"### QUESTION: ###\n{question}")
print(f"--------------------------------------------------")
print(f"### ANSWER in {response_latency:.2f} seconds: ###\n{answer}")

### QUESTION: ###
Wann ist die dreidimensionale Bildgebung bei der Entfernung von Weisheitszähnen indiziert?
--------------------------------------------------
### ANSWER in 3.72 seconds: ###
Die dreidimensionale Bildgebung, wie beispielsweise die digitale Volumentomographie (DVT), ist bei der Entfernung von Weisheitszähnen in folgenden Fällen indiziert:

1. **Komplexe anatomische Verhältnisse**: Wenn die Weisheitszähne in einer Position liegen, die nahe an wichtigen anatomischen Strukturen wie Nerven oder Kieferhöhlen sind. Dies ermöglicht eine genauere Planung der chirurgischen Intervention.

2. **Vorhandensein von Zysten oder Tumoren**: Bei Verdacht auf zystische oder tumoröse Veränderungen im Bereich der Weisheitszähne sollte eine dreidimensionale Bildgebung in Betracht gezogen werden, um die Ausdehnung und die Beziehung zu umgebenden Strukturen besser zu verstehen.

3. **Unklare Röntgenbefunde**: Wenn die zweidimensionalen Röntgenaufnahmen (z. B. Panoramaschichtaufnahmen) keine au