# RAG with LlamaIndex

In [153]:
import llama_index
from llama_index.core import SimpleDirectoryReader
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter  # from text to chunks
from llama_index.embeddings.huggingface import HuggingFaceEmbedding  # from chunks to vectors
from llama_index.core.ingestion import IngestionPipeline
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.llms.huggingface_api import  HuggingFaceInferenceAPI
from llama_index.llms.ollama import Ollama
from llama_index.core.evaluation import  FaithfulnessEvaluator
from dotenv import load_dotenv
import os
load_dotenv()

True

In [154]:
# GLOBALS
HF_TOKEN = os.getenv('HF_TOKEN')
PHOENIX_API_KEY = os.getenv('PHOENIX_API_KEY')
model_name = 'BAAI/bge-small-en-v1.5'
big_model_name = 'Qwen/Qwen2.5-Coder-32B-Instruct'

In [155]:
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix",
    endpoint="https://llamatrace.com/v1/traces"
)

In [156]:
reader = SimpleDirectoryReader(input_dir='papers')
docs = reader.load_data()

In [157]:
type(docs)

list

In [158]:
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0),
        HuggingFaceEmbedding(model_name=model_name)
    ]
)

In [159]:
nodes = await pipeline.arun(documents=docs)

In [160]:
type(nodes)

list

In [161]:
db = chromadb.PersistentClient(path='./alfred_chroma_db')
chroma_collection = db.get_or_create_collection('alfred')
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0),
        HuggingFaceEmbedding(model_name=model_name)
    ],
    vector_store=vector_store
)

In [162]:
nodes = await pipeline.arun(documents=docs)

In [163]:
embed_model = HuggingFaceEmbedding(model_name=model_name)
# index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)

In [164]:
# llm = HuggingFaceInferenceAPI(model_name=big_model_name, token=HF_TOKEN)
# llm = Ollama(model="gemma3:1b", request_timeout=120.0)
# llm = Ollama(model="llama3.2:latest", request_timeout=120.0)
# llm = Ollama(model="qwen3:8b", request_timeout=120.0)
llm = Ollama(model="qwen3:1.7b", request_timeout=120.0)

In [165]:
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
    # response_mode="compact",
    # response_mode="refine",
)
answer = query_engine.query("What is CGA?")
print(answer)

<think>
Okay, the user is asking what CGA is. Let me look at the context provided.

From the context, CGA is mentioned in the context of solving problems like SACG and LMAPF. There's a section about Theorem 1 and 2, which talk about the completeness and reachability of CGA. The algorithms CGA and CGA(L) are described, with CGA using a corridor selection method to guide agents towards their goals. The proof outlines mention that CGA ensures agents move optimally and evacuate corridors, leading to their goals. 

The user wants a concise answer without using the context directly. So, I need to summarize the key points: CGA is an algorithm that helps agents navigate grids to reach goals by selecting optimal paths and evacuating corridors, ensuring they can solve problems like SACG and LMAPF. It uses a corridor selection step and handles priorities for agents. The answer should avoid mentioning specific theorems and focus on the overall function and process.
</think>

CGA is an algorithm de

In [166]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

# build index
index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)

# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=2,
    embed_model=embed_model
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize", llm=llm
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [167]:
# query
response = query_engine.query("What is CGA stands for?")
print(response)

<think>
</think>

CGA stands for **Corridor Guidance Algorithm**. It is an algorithm designed to solve problems related to graph traversal and pathfinding, particularly in scenarios involving multiple agents and goals. The algorithm ensures that agents move optimally from one non-SV (non-solution vertex) to another, leveraging corridor selection to achieve efficient and guaranteed convergence to their goals.


In [168]:
evaluator = FaithfulnessEvaluator(llm=llm)

In [169]:
eval_result = await evaluator.aevaluate_response(response=response)
# print(eval_result)
print(eval_result.passing)

True


---

# Tools in LlamaIndex
