# 1. Setup Asyncio

In [2]:
import nest_asyncio

nest_asyncio.apply()

# 2. Setup the Qdrant vector database

In [3]:
import qdrant_client

collection_name = "chat_with_docs"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333,
)



# 3. Read the documents

In [4]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = "./docs"

loader = SimpleDirectoryReader(
    input_dir=input_dir_path,
    required_exts=[".pdf"],
    recursive=True
)

docs = loader.load_data()

In [5]:
docs

[Document(id_='376789a4-ab26-45de-94c5-8b3e2f3e6f03', embedding=None, metadata={'page_label': '1', 'file_name': 'Rational AI - Full Deck.pdf', 'file_path': '/Users/fc/experiments/rag-project/docs/Rational AI - Full Deck.pdf', 'file_type': 'application/pdf', 'file_size': 7979133, 'creation_date': '2025-06-13', 'last_modified_date': '2025-06-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Your Data,Your AI\nEnsuring a safe LLM adoption with Rational AI', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='56ad345a-d3a5-488f-93a6

In [6]:
type(docs), len(docs)

(list, 80)

## 4. A function to index data

In [7]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):

    vector_store = QdrantVectorStore(client=client,
                                     collection_name=collection_name)
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(documents,
                                            storage_context=storage_context)
    
    return index

# 5. Load the embedding model and index data

In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

Settings.embed_model = embed_model

index = create_index(docs)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
type(index)

llama_index.core.indices.vector_store.base.VectorStoreIndex

## 6. Load the LLM

In [10]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

Settings.llm = llm

In [11]:
type(Settings)

llama_index.core.settings._Settings

# 7. Define the prompt template

In [12]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

# 8. Reranking

Here, we use a cross-encoder to re-rank the document chunks. Also, we limit the output to the top 3 most relevant chunks based on the model‚Äôs scoring.

In [13]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

In [14]:
rerank

SentenceTransformerRerank(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x344c0c370>, model='cross-encoder/ms-marco-MiniLM-L-2-v2', top_n=3, device='mps', keep_retrieval_score=False, trust_remote_code=False)

# 9. Query the document

In [28]:
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

# response = query_engine.query("What exactly is DSPy?")
# response = query_engine.query("How is DSPy pronounced?")
# response = query_engine.query("What is the github repo for docling?")
# response = query_engine.query("Which is the RationalAI product that addresses customer care?")
response = query_engine.query("What is the point with the Rational control room?")

In [29]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

The Rational AI Control Room appears to be an internal system used for monitoring and managing various aspects of an organization's operations. Specifically, it likely provides live sentiment analysis, critical message notifications, and proactive suggestions to improve operations.

In [30]:
response.metadata

{'bccc7cf3-0be6-49ed-ab45-674cbb02d684': {'page_label': '28',
  'file_name': 'Rational AI - Full Deck.pdf',
  'file_path': '/Users/fc/experiments/rag-project/docs/Rational AI - Full Deck.pdf',
  'file_type': 'application/pdf',
  'file_size': 7979133,
  'creation_date': '2025-06-13',
  'last_modified_date': '2025-06-10'},
 '25635acd-5add-4b0f-951c-c8e34cfda90c': {'page_label': '27',
  'file_name': 'Rational AI - Full Deck.pdf',
  'file_path': '/Users/fc/experiments/rag-project/docs/Rational AI - Full Deck.pdf',
  'file_type': 'application/pdf',
  'file_size': 7979133,
  'creation_date': '2025-06-13',
  'last_modified_date': '2025-06-10'},
 'c94bf0aa-aee2-4e94-a54f-dd80b83e5d70': {'page_label': '20',
  'file_name': 'Rational AI - Full Deck.pdf',
  'file_path': '/Users/fc/experiments/rag-project/docs/Rational AI - Full Deck.pdf',
  'file_type': 'application/pdf',
  'file_size': 7979133,
  'creation_date': '2025-06-13',
  'last_modified_date': '2025-06-10'}}