In [None]:
!docker run --name mongodb -d -p 27017:27017 mongo:latest

In [None]:
!docker ps

In [None]:
%pip install llama-index-storage-docstore-mongodb
%pip install llama-index-storage-index-store-mongodb
%pip install llama-index


In [None]:
import os
import logging
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex, SimpleKeywordTableIndex, SummaryIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SentenceSplitter
from llama_index.storage.docstore.mongodb import MongoDocumentStore
from llama_index.storage.index_store.mongodb import MongoIndexStore


In [None]:

# Configure logging
logging.basicConfig(level=logging.INFO)

# MongoDB connection URI
MONGO_URI = "mongodb://localhost:27017"


In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
# documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()


In [None]:

# Step 2: Parse documents into nodes
parser = SentenceSplitter()
nodes = parser.get_nodes_from_documents(documents)
# Create a storage context

# Step 2: Create MongoDB Storage Context
docstore = MongoDocumentStore.from_uri(uri=MONGO_URI)
index_store = MongoIndexStore.from_uri(uri=MONGO_URI)

docstore.add_documents(nodes)  # Add nodes to the document store


storage_context = StorageContext.from_defaults(docstore=docstore, index_store=index_store)


In [None]:
#os.environ["OPENAI_API_KEY"] = getpass.getpass("open ai api key: ")
from llama_index.core import Settings
from llama_index.llms.ollama import  Ollama
Settings.llm = Ollama(model='llama3.2:latest', base_url='http://localhost:11434',temperature=0.1)



In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",  # Replace with your desired model
    base_url="http://localhost:11434",  # Ensure Ollama is running at this endpoint
    ollama_additional_kwargs={"mirostat": 0} #Mirostat is a technique for controlling perplexity and balancing the text generation process in large language models (LLMs).
) 

In [None]:


# Step 4: Build Multiple Indexes
summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=ollama_embedding)
keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)


In [None]:
# Save index IDs for reloading later
summary_id = summary_index.index_id
vector_id = vector_index.index_id
keyword_id = keyword_table_index.index_id

# Print saved IDs (optional for verification)
print(f"Summary Index ID: {summary_id}")
print(f"Vector Index ID: {vector_id}")
print(f"Keyword Table Index ID: {keyword_id}")

In [None]:
# Persist storage context (only vector store if needed)
storage_context.persist()


In [None]:
 #Save index IDs
index_ids = {
    "summary_id": summary_index.index_id,
    "vector_id": vector_index.index_id,
    "keyword_id": keyword_table_index.index_id,
}

# Save the index IDs for later use
import json
with open("mongodb_index_ids.json", "w") as f:
    json.dump(index_ids, f)
print("Index IDs saved.")

In [None]:

# Verify that nodes are persisted
print(f"Total nodes in MongoDB DocumentStore: {len(docstore.docs)}")

# Step 5: Query the Indexes
# Configure OpenAI GPT model
#chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
#from llama_index.core import Settings

#Settings.llm = chatgpt

# Query summary index
query_engine = summary_index.as_query_engine()
response = query_engine.query("What is a summary of this document?")
print("Summary Response:", response)

# Query vector index
query_engine = vector_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print("Vector Response:", response)

# Query keyword table index
query_engine = keyword_table_index.as_query_engine()
response = query_engine.query("What did the author do after his time at YC?")
print("Keyword Table Response:", response)


In [None]:
from llama_index.core.response.notebook_utils import display_response

display_response(summary_response)
display_response(vector_response)
display_response(keyword_response)