## Handling asynchronous operations

In [2]:
import nest_asyncio
nest_asyncio.apply()

## Set up Qdrant vector database

In [3]:
import qdrant_client
collection_name = "chat_with_docs"
client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

## Read the documents

In [4]:
from llama_index.core import SimpleDirectoryReader

input_dir = "./docs"

loader = SimpleDirectoryReader(
    input_dir=input_dir,
    required_exts=[".pdf"],
    recursive=True
)

docs = loader.load_data()

## A function to index data

In [5]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):

    vector_store = QdrantVectorStore(client=client,
                                     collection_name=collection_name)
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(documents,
                                            storage_context=storage_context)
    
    return index

## Load the embedding model and index data

In [6]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

Settings.embed_model = embed_model

index = create_index(docs)

  from .autonotebook import tqdm as notebook_tqdm


## Load the LLM

In [7]:
from llama_index.llms.openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

In [8]:
llm = OpenAI(
    api_key=openai_api_key,
    model = "gpt-4o-mini",
    request_timeout=120
)

Settings.llm = llm

## Define the prompt template

In [9]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

## Re-ranking

In [10]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

## Query the document

In [11]:
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [12]:
response = query_engine.query("What exactly is DSPy?")

## Display the output

In [13]:
from IPython.display import display, Markdown
display(Markdown(str(response)))

DSPy is a programming model designed to abstract and optimize the process of prompting language models (LMs) for natural language processing tasks. It utilizes natural language signatures, which are typed declarations that specify the input and output fields of a function, allowing users to define what a text transformation should accomplish without detailing how to prompt the LM. DSPy includes modules like Predict, ChainOfThought, and others that translate various prompting techniques into reusable functions. This framework enables the creation of self-improving, multi-stage NLP systems using smaller and more efficient LMs, significantly enhancing performance compared to traditional hand-crafted prompts.