In [None]:
%%capture
!pip install llama-index==0.10.37 llama-index-embeddings-openai==0.1.9 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 llama-index-llms-cohere==0.2.0

In [None]:
import os
from dotenv import load_dotenv
from getpass import getpass

import nest_asyncio

nest_asyncio.apply()
load_dotenv()

In [None]:
CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

In [None]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [None]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [None]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

# Query Pipelines

<img src="https://docs.llamaindex.ai/en/stable/_static/query/pipeline_rag_example.png">

Source: [LlamaIndex Docs](https://docs.llamaindex.ai/en/stable/module_guides/querying/pipeline/)

LlamaIndex offers a query API for chaining modules to manage data workflows easily. It revolves around the QueryPipeline, where you link various modules like LLMs, prompts, and retrievers in a sequence or DAG for end-to-end execution.

You can streamline workflows efficiently using QueryPipeline, reducing code complexity and enhancing readability. Additionally, a declarative interface ensures easy serialization of pipeline components for portability and deployment across systems in the future.

In [None]:
from llama_index.core.settings import Settings
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = Cohere(model="command-r-plus", api_key=CO_API_KEY)

Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small", api_key=OPENAI_API_KEY)

In [None]:
from qdrant_client import QdrantClient
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore

# Create a Qdrant client
client = QdrantClient(
    url=QDRANT_URL, 
    api_key=QDRANT_API_KEY,
)

# Create a Qdrant vector store
vector_store = QdrantVectorStore(
    client=client, 
    collection_name="it_can_be_done"
    )

# Create a vector store index
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=Settings.embed_model,
)

# A RAG Pipeline with PromptTemplate

I'm going to kick it off with a slightly complex workflow where the input is passes through two prompts before initiating retrieval.

1. Retrieve question about given topic.

2. Rephrase the context

Each prompt only takes in one input, so `QueryPipeline` will automatically chain LLM outputs into the prompt and then into the LLM.

You'll see how to define links more explicitly in the next section.

In [None]:
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core import PromptTemplate
from llama_index.core import PromptTemplate

# generate question regarding topic
prompt_str1 = "Retrieve context about the following topic: {topic}"
prompt_tmpl1 = PromptTemplate(prompt_str1)

prompt_str2 = """Syntesize the context provided into an answer using modern slang, while still quoting the sources.

Context:

{query_str}

Syntesized response:
"""

prompt_tmpl2 = PromptTemplate(prompt_str2)

retriever = index.as_retriever(similarity_top_k=5)

p = QueryPipeline(
    chain=[
        prompt_tmpl1, 
        retriever,
        prompt_tmpl2, 
        Settings.llm, 
        ], 
        verbose=True
)

In [None]:
response = p.run(topic="Working hard to achieve your goals even when you doubt yourself and your chances of success")

In [None]:
print(response)

You can debug the pipeline by viewing intermediate inputs and outputs

In [None]:
output, intermediates  = p.run_with_intermediates(topic="Working hard to achieve your goals even when you doubt yourself and your chances of success")

In [None]:
output.__dict__

In [None]:
intermediates

In [None]:
intermediates['d7612067-3809-4d48-aa49-0c957da8de40']

### Another RAG Pipeline

Here we setup a RAG pipeline without the query rewriting step.

Here we need a way to link the input query to both the retriever and summarizer. 

We can do this by defining a special `InputComponent`, allowing us to link the inputs to multiple downstream modules.

In [None]:
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.query_pipeline import InputComponent
from llama_index.llms.openai import OpenAI

input = InputComponent()

retriever = index.as_retriever(similarity_top_k=5)

llm = OpenAI(model="gpt-4o")

tree_summarizer = TreeSummarize(llm=llm)

In [None]:
p = QueryPipeline(verbose=True, show_progress=True)

p.add_modules(
    {
        "input": input,
        "retriever": retriever,
        "tree_summarizer": tree_summarizer,
    }
)
p.add_link("input", "retriever")
p.add_link("input", "tree_summarizer", dest_key="query_str")
p.add_link("retriever", "tree_summarizer", dest_key="nodes")

In [None]:
response = p.run(input="Working hard to achieve your goals even when you doubt yourself and your chances of success")

In [None]:
print(str(response))

In [None]:
response.__dict__