In [None]:
%pip install llama-index-llms-openai
!pip install llama-index
#import nest_asyncio

#nest_asyncio.apply()
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.WARN)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


In [None]:
from llama_index.core import SimpleDirectoryReader

# documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()



In [None]:
from llama_index.core.node_parser import SentenceSplitter

nodes = SentenceSplitter().get_nodes_from_documents(documents)


In [None]:
nodes

In [None]:
from llama_index.core.storage.docstore import SimpleDocumentStore

docstore = SimpleDocumentStore()
docstore.add_documents(nodes)


In [None]:
len(docstore.docs)


In [None]:
docstore.docs.keys()


In [None]:
doc_key = list(docstore.docs.keys())[0]  # Select the first document key
docstore.docs[doc_key]


In [None]:
node = docstore.docs[doc_key]
print(node.text)  # Displays the text content of the node


In [None]:
#os.environ["OPENAI_API_KEY"] = getpass.getpass("open ai api key: ")
from llama_index.core import Settings
from llama_index.llms.ollama import  Ollama
Settings.llm = Ollama(model='llama3.2:latest', base_url='http://localhost:11434',temperature=0.1)



In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",  # Replace with your desired model
    base_url="http://localhost:11434",  # Ensure Ollama is running at this endpoint
    ollama_additional_kwargs={"mirostat": 0} #Mirostat is a technique for controlling perplexity and balancing the text generation process in large language models (LLMs).
) 

In [None]:
from llama_index.core import StorageContext, SummaryIndex, VectorStoreIndex, SimpleKeywordTableIndex

storage_context = StorageContext.from_defaults(docstore=docstore)
summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=ollama_embedding)
keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)


In [None]:
query_engine = summary_index.as_query_engine()
response = query_engine.query("What this document is about?")
print(response)

In [None]:
query_engine = vector_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)


In [None]:
query_engine = keyword_table_index.as_query_engine()
response = query_engine.query("Which company annual or quarterly report is this?")
print(response)

In [None]:
len(docstore.docs)

In [None]:
from llama_index.core import Document  # Import the Document class

# Example: Adding new documents
new_documents = [
    Document(text="This is a new document."),
    Document(text="Here is another document to add."),
]

# Convert documents into nodes
new_nodes = SentenceSplitter().get_nodes_from_documents(new_documents)


docstore.add_documents(new_nodes)

# Verify the contents of the DocumentStore
print(f"Total documents in the docstore: {len(docstore.docs)}")
for key, node in docstore.docs.items():
    print(f"Key: {key}, Text: {node.text}")


In [None]:
len(docstore.docs)

In [None]:
from llama_index.core.schema import TextNode

# Example: Fetch a node to update
doc_key = list(docstore.docs.keys())[0]  # Get the first document key
node = docstore.docs[doc_key]

# Create a new node with updated metadata
updated_node = TextNode(
    text="Text Updated to test Node text update",  # Keep the original text
    extra_info={"source": "Updated Source"}  # Update the metadata
)

# Remove the old node and add the updated node
docstore.delete_document(doc_key)
docstore.add_documents([updated_node])

# Verify the updated node
print(f"Updated Node Metadata: {updated_node.extra_info}")


In [None]:
len(docstore.docs)

In [None]:
doc_key = list(docstore.docs.keys())[30]  # Select the first document key
docstore.docs[doc_key]
node = docstore.docs[doc_key]
print(node.text)  # Displays the text content of the node


In [None]:

# Example: Replacing all the documents
# Wrap plain strings in `Document` objects
new_documents = [
    Document(text="This is a new document."),
    Document(text="Here is another document to add."),
]

# Convert documents into nodes
new_nodes = SentenceSplitter().get_nodes_from_documents(new_documents)

# Create a new DocumentStore and add the new nodes
new_docstore = SimpleDocumentStore()
new_docstore.add_documents(new_nodes)

# Verify the contents of the new DocumentStore
print(f"Total documents in the new docstore: {len(new_docstore.docs)}")
for key, node in new_docstore.docs.items():
    print(f"Key: {key}, Text: {node.text}")

docstore=new_docstore    


In [None]:
len(docstore.docs)