In [None]:
import nest_asyncio
nest_asyncio.apply()

import qdrant_client
from qdrant_client.models import VectorParams, Distance
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

# Define collection name
collection_name = "chat_with_docs"

# Connect to Qdrant
try:
    client = qdrant_client.QdrantClient(
        host="localhost", 
        port=6333
    )
    # Test connection by getting collection info
    try:
        client.get_collection(collection_name)
        print(f"Successfully connected to Qdrant and found collection '{collection_name}'")
    except Exception as e:
        print(f"Collection '{collection_name}' not found. Creating it now...")
        # Get the dimension from your embedding model
        embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                          trust_remote_code=True)
        embedding_dimension = 1024  # bge-large-en-v1.5 uses 1024 dimensions
        
        # Create the collection with proper format using VectorParams
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=embedding_dimension,
                distance=Distance.COSINE
            )
        )
        print(f"Created collection '{collection_name}'")
except Exception as e:
    print(f"Error connecting to Qdrant: {e}")
    print("\nPossible solutions:")
    print("1. Make sure Qdrant is running locally with: docker run -p 6333:6333 qdrant/qdrant")
    print("2. Check if port 6333 is not blocked by firewall")
    print("3. Verify no other application is using port 6333")
    raise

# Load documents
input_dir_path = './docs'
loader = SimpleDirectoryReader(
    input_dir=input_dir_path,
    required_exts=[".pdf"],
    recursive=True
)
docs = loader.load_data()
print(f"Loaded {len(docs)} documents")

# Set up embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                  trust_remote_code=True)
Settings.embed_model = embed_model

# Create index function
def create_index(documents):
    print("Creating vector store...")
    vector_store = QdrantVectorStore(client=client,
                                    collection_name=collection_name)
    
    print("Setting up storage context...")
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    print("Building index from documents (this may take a while)...")
    index = VectorStoreIndex.from_documents(documents,
                                           storage_context=storage_context)
    
    print("Index creation complete!")
    return index

# Create the index
try:
    index = create_index(docs)
    print("Successfully created index!")
except Exception as e:
    print(f"Error creating index: {e}")
    raise

Collection 'chat_with_docs' not found. Creating it now...
Error connecting to Qdrant: timed out

Possible solutions:
1. Make sure Qdrant is running locally with: docker run -p 6333:6333 qdrant/qdrant
2. Check if port 6333 is not blocked by firewall
3. Verify no other application is using port 6333


ResponseHandlingException: timed out

In [None]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

Settings.llm = llm

In [3]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

In [None]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

In [None]:
import time
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

total_time_s = time.time()

response = query_engine.query("What exactly is DSPy?")

total_time_e = time.time()
total_time = total_time_e - total_time_s

print(f"time taken for fill response is {total_time}")



In [None]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

In [None]:
import time
from llama_index.core import QueryBundle

query = "What exactly is DSPy?"
query_bundle = QueryBundle(query_str=query)

# Measure total query time
start_time = time.time()

# Measure similarity search time
sim_start_time = time.time()
retrieved_docs = query_engine.retriever.retrieve(query) 
sim_time = time.time() - sim_start_time
print(f"Time taken for similarity search: {sim_time:.4f} seconds")

# Measure reranking time
rerank_start_time = time.time()
reranked_docs = rerank.postprocess_nodes(retrieved_docs, query_bundle=query_bundle)
rerank_time = time.time() - rerank_start_time
print(f"Time taken for reranking: {rerank_time:.4f} seconds")

# Measure LLM response time
llm_start_time = time.time()
response = query_engine.synthesize(query_bundle, nodes=reranked_docs)
llm_time = time.time() - llm_start_time
print(f"Time taken for LLM response: {llm_time:.4f} seconds")

# Total time
total_time = time.time() - start_time
print(f"Total query time: {total_time:.4f} seconds")

# Print the final response
print("\nResponse from LLM:")
print(response)
