In [2]:
!pip install llama-index datasets llama-index-callbacks-arize-phoenix arize-phoenix llama-index-vector-stores-chroma llama-index-llms-huggingface-api llama-index-embeddings-huggingface -U -q

In [3]:
# Setting up the persona database

from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

README.md:   0%|          | 0.00/618 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/35.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [4]:
# Loading and embedding persona documents
# We will use the SimpleDirectoryReader to load the persona descriptions from the data directory. 
# This will return a list of Document objects.

from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)


5000

In [5]:
# Now we have a list of Document objects, we can use the IngestionPipeline to create nodes from the documents 
# and prepare them for the QueryEngine. We will use the SentenceSplitter to split the documents into smaller chunks 
#     and the HuggingFaceEmbedding to embed the chunks.

from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ]
)

# run the pipeline sync or async
nodes = await pipeline.arun(documents=documents[:10])
nodes

2025-08-20 16:56:29,937 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2025-08-20 16:56:38,262 - INFO - 1 prompt is loaded, with the key: query


[TextNode(id_='e0435086-ddc5-47e3-b3c4-3457a787a893', embedding=[-0.016104906797409058, 0.005574995651841164, 0.060049813240766525, -0.00852985866367817, 0.004600426647812128, -0.025849564000964165, 0.01811128854751587, 0.027776028960943222, -0.034005504101514816, -0.06651199609041214, 0.01721017062664032, -0.015291910618543625, -0.030471427366137505, 0.03640660271048546, -0.010577877052128315, -0.008454144932329655, 0.0016137290513142943, 0.07660044729709625, 0.0032418363261967897, 0.004224814940243959, 0.015009159222245216, -0.06628158688545227, 0.07391129434108734, -0.03285926580429077, -0.035700440406799316, 0.01729167066514492, 0.02407201938331127, -0.03954237326979637, -0.020483847707509995, -0.12804776430130005, -0.04551275447010994, 0.023442218080163002, 0.002382115926593542, 0.012557381764054298, 0.0369461327791214, 0.0014122242573648691, -0.026638088747859, 0.05359045788645744, 0.009238041006028652, 0.008923035115003586, 0.02590860426425934, 0.013062710873782635, -0.022857580

In [None]:
# Storing and indexing documents
# Since we are using an ingestion pipeline, we can directly attach a vector store to the pipeline to populate it.
#     In this case, we will use Chroma to store our documents. Let's run the pipeline again with the vector store attached.
#     The IngestionPipeline caches the operations so this should be fast!

import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

In [None]:
# We can create a VectorStoreIndex from the vector store and use it to query the documents by passing the
# vector store and embedding model to the from_vector_store() method.

from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

In [None]:
# Querying the index
# Now that we have our index, we can use it to query the documents. Let's create a QueryEngine from 
# the index and use it to query the documents using a specific response mode.

from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import nest_asyncio

nest_asyncio.apply()  # This is needed to run the query engine
llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
response

In [6]:
# Evaluation and observability
# LlamaIndex provides built-in evaluation tools to assess response quality. 
#     These evaluators leverage LLMs to analyze responses across different dimensions. We can now check if 
# the query is faithful to the original persona.

from llama_index.core.evaluation import FaithfulnessEvaluator

# query index
evaluator = FaithfulnessEvaluator(llm=llm)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing

SyntaxError: invalid syntax (3956907225.py, line 2)

In [None]:
# If one of these LLM based evaluators does not give enough context, we can check the response using 
# the Arize Phoenix tool, after creating an account at LlamaTrace and generating an API key.

import llama_index
import os

PHOENIX_API_KEY = "<PHOENIX_API_KEY>"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix", endpoint="https://llamatrace.com/v1/traces"
)

In [None]:
response = query_engine.query(
    "What is the name of the someone that is interested in AI and techhnology?"
)