In [None]:
#%pip install -q -U openai
#%pip install python-dotenv ipywidgets
#%pip install llama-index datasets llama-index-callbacks-arize-phoenix llama-index-vector-stores-chroma llama-index-llms-huggingface llama-index-llms-huggingface-api -U -q
#%pip install llama-index-embeddings-huggingface
#%pip install llama-index-embeddings-huggingface-api llama-index-llms-openai -U -q"

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()  # Load environment variables from .env file

open_api_key = os.getenv('OPENAI_API_KEY')

#print(open_api_key)


In [2]:
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage


# Initialize the model with the required parameters
openai_model_name =  "gpt-4o-mini"
model = OpenAI(model=openai_model_name, api_key=open_api_key)

messages = [
    ChatMessage(
        role="system", content="You are chatting with me"
    ),
    ChatMessage(role="user", content="What is your name"),
]
resp = model.chat(messages)

print(resp)

assistant: I’m called ChatGPT. How can I assist you today?


In [3]:
from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

In [4]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)

5000

In [5]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        embed_model ,
    ]
)

# run the pipeline sync or async
nodes = await pipeline.arun(documents=documents[:10])
nodes

[TextNode(id_='c3b1c7d4-d764-4800-8ea5-ca2ac58bf967', embedding=[-0.03859766200184822, 0.00608021067455411, 0.03927840292453766, 0.0060350908897817135, 0.008561545982956886, -0.02720671519637108, -0.010228579863905907, 0.02880766987800598, -0.055345624685287476, -0.06434406340122223, -0.0012660942738875747, -0.014853705652058125, -0.02797834947705269, 0.031023140996694565, -0.022028110921382904, 0.006874281447380781, -0.005407379008829594, 0.08990827202796936, -0.012796765193343163, 0.014324701391160488, 0.008379798382520676, -0.06035181134939194, 0.06169970706105232, -0.011637511663138866, -0.014577667228877544, 0.021121615543961525, 0.02215029112994671, -0.003832101821899414, 0.0023719146847724915, -0.12213359028100967, -0.03922849893569946, 0.011650609783828259, -0.015394764021039009, 0.004795906133949757, 0.05800643190741539, 0.026605533435940742, 0.01672874018549919, 0.049385517835617065, 0.003721249522641301, 0.03411903604865074, 0.034329041838645935, 0.002967412117868662, -0.023

In [6]:


import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        embed_model,
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

10

In [7]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

In [8]:

import nest_asyncio

nest_asyncio.apply()  # This is needed to run the query engine

query_engine = index.as_query_engine(
    llm=model,
    response_mode="tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
response



Response(response='An author with a passion for travel, who has journeyed through diverse cultures and landscapes, weaving their experiences into compelling narratives. This individual often finds inspiration in the places they visit, capturing the essence of local customs, traditions, and the stories of the people they meet. Their travels are not just about exploration but also about understanding the deeper connections between cultures, reflecting on how these experiences shape their writing and worldview. Whether wandering through bustling markets, hiking remote trails, or engaging in conversations with locals, the author seeks to immerse themselves fully in each destination, allowing their adventures to inform their literary voice.', source_nodes=[NodeWithScore(node=TextNode(id_='87cbc397-23e9-44df-81ac-091c2d0cc204', embedding=None, metadata={'file_path': '/home/martin/workspace/HuggingFaceAgentsCourse/notebooks/llamaIndexAgents/data/persona_1.txt', 'file_name': 'persona_1.txt', '

In [9]:
from llama_index.core.evaluation import FaithfulnessEvaluator

# query index
evaluator = FaithfulnessEvaluator(llm=model)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing



False

In [10]:
# Debug, if result is false: Creating an account at LlamaTrace
'''
import llama_index
import os

PHOENIX_API_KEY = "<PHOENIX_API_KEY>"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix", endpoint="https://llamatrace.com/v1/traces"
)
'''

'\nimport llama_index\nimport os\n\nPHOENIX_API_KEY = "<PHOENIX_API_KEY>"\nos.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"\nllama_index.core.set_global_handler(\n    "arize_phoenix", endpoint="https://llamatrace.com/v1/traces"\n)\n'

In [11]:
response = query_engine.query(
    "What is the name of the someone that is interested in AI and techhnology?"
)
response



Response(response='The provided information does not include any details about someone specifically interested in AI and technology.', source_nodes=[NodeWithScore(node=TextNode(id_='87cbc397-23e9-44df-81ac-091c2d0cc204', embedding=None, metadata={'file_path': '/home/martin/workspace/HuggingFaceAgentsCourse/notebooks/llamaIndexAgents/data/persona_1.txt', 'file_name': 'persona_1.txt', 'file_type': 'text/plain', 'file_size': 266, 'creation_date': '2025-03-15', 'last_modified_date': '2025-03-15'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ca66f538-ac1d-4161-8fe2-caafab7cf5b9', node_type='4', metadata={'file_path': '/home/martin/workspace/HuggingFaceAgentsCourse/notebooks/llamaIndexAgents/data/persona_1.tx