# **Load Dataset**

In [1]:
from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")


In [3]:
Path("data").mkdir(parents=True,exist_ok=True) 

In [None]:
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

# **Load and embed documents**

In [9]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)

942

In [11]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding

pipeline = IngestionPipeline(
    transformations=[SentenceSplitter(),
                     HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5")]
)
nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

10

# **vector store**

In [15]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[SentenceSplitter(),
                     HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5"),
                   ],  vector_store=vector_store,
)
nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

10

# **Get index to query**

In [16]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding

embed_model = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

In [17]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import nest_asyncio
nest_asyncio.apply()

llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
query_engine = index.as_query_engine(
    llm=llm,
    response_mode = "tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experience"
)
response

Response(response="An individual deeply versed in the nuances of Cypriot culture, history, and society, having dedicated significant time to research and reside in Cyprus. This person has immersed themselves in the local customs and way of life, offering a rich perspective on the island's cultural tapestry.", source_nodes=[NodeWithScore(node=TextNode(id_='8ce9cc96-80e5-487e-aa98-b64a582e181a', embedding=None, metadata={'file_path': 'c:\\Users\\raman\\Downloads\\Agents\\Agents_Codes\\llamaindex\\data\\persona_1.txt', 'file_name': 'persona_1.txt', 'file_type': 'text/plain', 'file_size': 266, 'creation_date': '2025-03-08', 'last_modified_date': '2025-03-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='48b

In [18]:
from llama_index.core.evaluation import FaithfulnessEvaluator

# query index
evaluator = FaithfulnessEvaluator(llm=llm)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing


False