In [1]:
import os
from pathlib import Path
from tempfile import mkdtemp
from warnings import filterwarnings
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.llms.openai import OpenAI
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.node_parser import MarkdownNodeParser, HierarchicalNodeParser, SemanticSplitterNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core.chat_engine import CondenseQuestionChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.readers.docling import DoclingReader
from llama_index.core.postprocessor import SentenceTransformerRerank


load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#""
'''BAAI/bge-small-en-v1.5 - Beijing Academy of Artificial Intelligence (BAAI)
Sentence Embedding Model / Text Embedding Model
Specifically designed for English text.
Based on E5 architecture, which itself is a modification of the MiniLM 
(or similar lightweight Transformer) architecture optimized for embedding tasks.
~60 million parameters'''

EMBED_MODEL = HuggingFaceEmbedding("BAAI/bge-small-en-v1.5")
embed_dim = len(EMBED_MODEL.get_text_embedding("Burger"))#
print(embed_dim)

384


In [3]:
SOURCE = "https://arxiv.org/pdf/2408.09869"

In [4]:
reader = DoclingReader()
document = reader.load_data(SOURCE) 

node_parser_mk = MarkdownNodeParser()

node_parser_semantic = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=EMBED_MODEL
)

#semantic_node_parser = SemanticSplitterNodeParser()
MILVUS_URI = str(Path(mkdtemp())/ 'docling_ahtsham.db')
vector_store = MilvusVectorStore(uri=MILVUS_URI,dim=embed_dim,overwrite=True)
index = VectorStoreIndex.from_documents(
    documents=document,
    transformations=[node_parser_mk, node_parser_semantic],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=EMBED_MODEL,
)


  from pkg_resources import DistributionNotFound, get_distribution
2025-07-11 14:42:29,444 [DEBUG][_create_connection]: Created new connection using: a8c5402f58dc45e6815f29232476320e (async_milvus_client.py:599)


In [5]:
models = ['gpt-3.5-turbo','text-davinci-003']

llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    max_tokens=1024,
    frequency_penalty=0
)



In [10]:


#QUERY =  'How do you ensure software compliance (licensing) in an organization '
QUERY = "when was the last olympic held?"
result = index.as_query_engine(similarity_top_k=10,llm=llm).query(QUERY)
print(f"Q: {QUERY}\nA: {result.response.strip()}")
#display([(n.text, n.metadata) for n in result.source_nodes])

Q: when was the last olympic held?
A: The last Olympic Games were held in 2021 in Tokyo, Japan.
