In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from dotenv import load_dotenv

load_dotenv()

documents = SimpleDirectoryReader("data").load_data()
print(documents)
print(documents[0].text)

[Document(id_='0ed79c6b-a486-4f18-bd2a-95e787dd1777', embedding=None, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified_date': '2024-03-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Q: What makes our pizza unique?\nA: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.\n\nQ: Do we offer gluten-free pizza options?\nA: Yes, we offer a delicious gluten-free crust option for our gu

In [2]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=200, chunk_overlap=10)
nodes = text_splitter.get_nodes_from_documents(documents=documents)


In [3]:
nodes

[TextNode(id_='a45c548c-afc4-4359-808d-e346f1c4db39', embedding=None, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified_date': '2024-03-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ed79c6b-a486-4f18-bd2a-95e787dd1777', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified

In [4]:
print(len(documents))
print(len(nodes))

1
2


In [5]:
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
from llama_index.core import StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding

chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("tes1233t")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [6]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=OpenAIEmbedding()
)

In [7]:
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, embed_model=OpenAIEmbedding())

In [8]:
retriever = index.as_retriever()

In [9]:
retriever.retrieve("How long does it take to prepare a pizza")

[NodeWithScore(node=TextNode(id_='89c80310-3ec7-4bb9-8530-d43feb4c6869', embedding=None, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified_date': '2024-03-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ed79c6b-a486-4f18-bd2a-95e787dd1777', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-

In [10]:
llm = OpenAI(model="gpt-3.5-turbo")
query_engine = index.as_query_engine(llm=llm)
# Settings.llm = OpenAI(model="gpt-3.5-turbo")
query_engine.query("How long does it take to prepare a pizza")

Response(response='On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.', source_nodes=[NodeWithScore(node=TextNode(id_='89c80310-3ec7-4bb9-8530-d43feb4c6869', embedding=None, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified_date': '2024-03-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ed79c6b-a486-4f18-bd2a-95e787dd1777', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/mowlanica.bill

In [11]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x12700d080>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.SYS

In [12]:
from llama_index.core import PromptTemplate

new_summary_tmpl_str = (
    "You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\n"
    "{context_str}\n"
    "Take that context and try to answer the question with it."
    "Query: {query_str}\n"
    "Answer: "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

In [13]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

In [14]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

{'response_synthesizer:text_qa_template': PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\n{context_str}\nTake that context and try to answer the question with it.Query: {query_str}\nAnswer: "), 'response_synthesizer:refine_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.REFINE: 'refine'>}, template_vars=['query_str', 'existing_answer', 'context_msg'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="The original query is as follows: {

In [15]:
query_engine.query("How long does it take to prepare a pizza")

Response(response='Hello my friend,\n\nOn average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.', source_nodes=[NodeWithScore(node=TextNode(id_='89c80310-3ec7-4bb9-8530-d43feb4c6869', embedding=None, metadata={'file_path': '/Users/mowlanica.billa/Desktop/Desktop/Data_Science/Projects/LLM/RAG/Langchain_Vs_LlamaIndex/data/text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-03-09', 'last_modified_date': '2024-03-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ed79c6b-a486-4f18-bd2a-95e787dd1777', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/