modified from https://huggingface.co/agents-course/notebooks/blob/main/unit2/llama-index/components.ipynb

In [None]:
import os
from pathlib import Path

import chromadb
import llama_index
import nest_asyncio
from datasets import load_dataset
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.vector_stores.chroma import ChromaVectorStore


# Create a QueryEngine for retrieval augmented generation
Setting up the persona database

In [None]:
dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

# Loading and embedding persona documents

In [None]:
reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)

In [None]:
# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        GeminiEmbedding(model_name="models/text-embedding-004"),
    ]
)

# run the pipeline sync or async
nodes = await pipeline.arun(documents=documents[:10])
nodes

# Storing and indexing documents

In [None]:
db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        GeminiEmbedding(model_name="models/text-embedding-004"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

In [None]:
embed_model = GeminiEmbedding(model_name="models/text-embedding-004")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

# Querying the index

In [None]:
nest_asyncio.apply()  # This is needed to run the query engine
llm = Gemini(
    model="models/gemini-2.0-flash",
    # api_key="some key",  # uses GOOGLE_API_KEY env var by default
)
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)

response

# Evaluation and observability

In [None]:
# query index
evaluator = FaithfulnessEvaluator(llm=llm)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing

In [None]:
PHOENIX_API_KEY = "<PHOENIX_API_KEY>"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix", endpoint="https://llamatrace.com/v1/traces"
)

In [None]:
response = query_engine.query(
    "What is the name of the someone that is interested in AI and techhnology?"
)
response