In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from dotenv import load_dotenv

load_dotenv()

documents = SimpleDirectoryReader("data").load_data()
print(documents)
print(documents[0].text)

[Document(id_='6750b923-8f19-4ada-a3c7-1d083a98e0dc', embedding=None, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Q: What makes our pizza unique?\nA: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.\n\nQ: Do we offer gluten-free

In [2]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=200, chunk_overlap=10)
nodes = text_splitter.get_nodes_from_documents(documents=documents)


In [3]:
nodes

[TextNode(id_='3648c5f8-7009-4aec-9e28-8973312af03e', embedding=None, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6750b923-8f19-4ada-a3c7-1d083a98e0dc', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, hash='7b478ec4403a52aabeb087a5f13e1a5824d97292609

In [4]:
print(len(documents))
print(len(nodes))

1
2


In [5]:
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
from llama_index.core import StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding

chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("tes1233t")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=OpenAIEmbedding()
)
index

In [None]:
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, embed_model=OpenAIEmbedding())
index

In [9]:
retriever = index.as_retriever()
retriever

<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever at 0x30b9e44d0>

In [10]:
retriever.retrieve("How long does it take to prepare a pizza")

[NodeWithScore(node=TextNode(id_='5275fbb7-c628-48a3-be39-5c794c9705b7', embedding=None, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6750b923-8f19-4ada-a3c7-1d083a98e0dc', node_type='4', metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, hash='7b478ec4403a52aabeb087a5f13e1a5824d97292609875a

In [11]:
llm = OpenAI(model="gpt-3.5-turbo")

query_engine = index.as_query_engine(llm=llm)

# Settings.llm = OpenAI(model="gpt-3.5-turbo")

query_engine.query("How long does it take to prepare a pizza")

Response(response='On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.', source_nodes=[NodeWithScore(node=TextNode(id_='5275fbb7-c628-48a3-be39-5c794c9705b7', embedding=None, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6750b923-8f19-4ada-a3c7-1d083a98e0dc', node_type='4', metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_n

In [12]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x11736d4e0>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.SYS

In [13]:
from llama_index.core import PromptTemplate


new_summary_tmpl_str = (
    "You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\n"
    "{context_str}\n"
    "Take that context and try to answer the question with it."
    "Query: {query_str}\n"
    "Answer: "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

In [14]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

In [15]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\n{context_str}\nTake that context and try to answer the question with it.Query: {query_str}\nAnswer: "), 'response_synthesizer:refine_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="The original query is as follows: {

In [16]:
query_engine.query("How long does it take to prepare a pizza")

Response(response='Hello my friend,\n\nOn average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.', source_nodes=[NodeWithScore(node=TextNode(id_='5275fbb7-c628-48a3-be39-5c794c9705b7', embedding=None, metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2025-02-24', 'last_modified_date': '2025-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6750b923-8f19-4ada-a3c7-1d083a98e0dc', node_type='4', metadata={'file_path': '/Users/simply007/projects/LLamaIndex-vs-LangChain-Basics/dat