In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from dotenv import load_dotenv

load_dotenv()

documents = SimpleDirectoryReader("data").load_data()
print(documents)
print(documents[0].text)

[Document(id_='754d0c7c-5f2a-4d8c-9b49-d45d15130280', embedding=None, metadata={'file_path': 'data\\text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-02-26', 'last_modified_date': '2024-02-28', 'last_accessed_date': '2024-02-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Q: What makes our pizza unique?\nA: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.\n\nQ: Do we offer gluten-free pizza options?\nA: Yes, we offer a delicious gluten-free crust option for our guests with dietary restrictions or preferences.\n\nQ: Can

In [2]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=200, chunk_overlap=10)
nodes = text_splitter.get_nodes_from_documents(documents=documents)


In [3]:
nodes

[TextNode(id_='f6fda4e3-f93f-4658-862a-0d817b14220d', embedding=None, metadata={'file_path': 'data\\text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-02-26', 'last_modified_date': '2024-02-28', 'last_accessed_date': '2024-02-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='754d0c7c-5f2a-4d8c-9b49-d45d15130280', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': 'data\\text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-02-26', 'last_modified_date': '2024-02-28', 'last_accessed_date': '2024-02-28'}, hash='adaca48c53952d4ceb54e0e41396ea411d485d1baa5dcf32a8f55f4d265c6d62'), <NodeRelationsh

In [4]:
print(len(documents))
print(len(nodes))

1
2


In [5]:
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
from llama_index.core import StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding

chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("tes1233t")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [6]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=OpenAIEmbedding()
)

In [7]:
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context, embed_model=OpenAIEmbedding())

In [8]:
retriever = index.as_retriever()

In [9]:
retriever.retrieve("How long does it take to prepare a pizza")

[NodeWithScore(node=TextNode(id_='c0bd1a8c-24d3-4a7b-9e31-8157a51a6026', embedding=None, metadata={'file_path': 'data\\text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-02-26', 'last_modified_date': '2024-02-28', 'last_accessed_date': '2024-02-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='754d0c7c-5f2a-4d8c-9b49-d45d15130280', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': 'data\\text.txt', 'file_name': 'text.txt', 'file_type': 'text/plain', 'file_size': 1331, 'creation_date': '2024-02-26', 'last_modified_date': '2024-02-28', 'last_accessed_date': '2024-02-28'}, hash='adaca48c53952d4ceb54e0e41396ea411d485d1baa5dcf32a8f55f4d265c6d62

In [10]:
# llm = OpenAI(model="gpt-3.5-turbo")

# query_engine = index.as_query_engine(llm=llm)

Settings.llm = OpenAI(model="gpt-3.5-turbo")

query_engine.query("How long does it take to prepare a pizza")

NameError: name 'query_engine' is not defined

In [None]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

In [None]:
from llama_index.core import PromptTemplate


new_summary_tmpl_str = (
    "You always say 'Hello my friend' at the beginning of your answer. Below you find data from a database\n"
    "{context_str}\n"
    "Take that context and try to answer the question with it."
    "Query: {query_str}\n"
    "Answer: "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

In [None]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": new_summary_tmpl}
)

In [None]:
prompts_dict = query_engine.get_prompts()
print(prompts_dict)

In [None]:
query_engine.query("How long does it take to prepare a pizza")