In [None]:
%%capture
!pip install llama-index llama-index-embeddings-openai qdrant-client llama-index-vector-stores-qdrant

In [None]:
import os
from getpass import getpass

In [None]:
os.environ['OPENAI_API_KEY'] = getpass("Enter your OpenAI API key: ")

# Querying

- 📊 Now that you've loaded your data and built an index, it's time to focus on the core of an LLM application: querying.

- 🤖 Querying at its simplest involves making a prompt call to an LLM - this could be asking a question, requesting a summary, or giving more complex instructions.

- 🔗 For more advanced uses, querying can include repeated or chained prompt calls to an LLM, or even a reasoning loop across multiple components.

Let's first instantiate the `qdrant` vector store.

In [None]:
import qdrant_client
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext

embed_model = OpenAIEmbedding(model="text-embedding-3-small")

# initialize qdrant client
client = qdrant_client.QdrantClient(
    path="persisted_storage/qdrant"
)

vector_store = QdrantVectorStore(
    client=client, 
    collection_name="it_can_be_done",
    path="persisted_storage/qdrant/",
    prefer_grpc=True
)

# assign qdrant vector store to storage context
storage_context = StorageContext.from_defaults(
    vector_store=vector_store,
    persist_dir="persisted_storage/qdrant"
    )

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, 
    embed_model=embed_model,
    storage_context=storage_context
)

# 🧐 The `QueryEngine`

The `QueryEngine` is the foundation of all querying. Here's what happens under the hood:

- 📚 **Retrieval**: Find and return the most relevant documents from the `Index` using strategies like "top-k" semantic retrieval.

- 🔧 **Postprocessing**: Optionally rerank, transform, or filter retrieved Nodes, often based on specific metadata like keywords.

- 🔄 **Response Synthesis**: Combine the query, relevant data, and prompt to generate a response from your LLM.


In [None]:
query_engine = index.as_query_engine()

response = query_engine.query(
    "What do the Sikh Stoics believe?"
)

print(response)

# Customizing Querying

- 🔧 **Customizing Retrieval**: Use LlamaIndex's low-level composition API to adjust `top_k` value for more granular control over query results.

- 📈 **Adding Post-Processing**: Implement a step to ensure only nodes meeting a minimum similarity score are included, balancing between data richness and relevance.

- 🎚️ **SimilarityPostprocessor**: Set a similarity score threshold, compatible only with embedding-based retrievers, to ensure high relevance.

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# configure a retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# configure a post processor
similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.84)

# configure a response sythesizer
response_synthsizer = get_response_synthesizer()

# create a query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)


In [None]:
query_engine.query("How do the authors express the theme of resilience in the face of challenges?")

In [None]:
query_engine.query("Compare the portrayal of internal versus external battles in the narratives and poems")