In [2]:
!pip install llama-index-llms-ollama llama-index-embeddings-ollama llama-index-readers-file
!pip install llama-index llama-index-core pillow

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [3]:
import os
import glob
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.schema import TextNode
from llama_index.core.response_synthesizers import get_response_synthesizer



In [6]:
# --- Configuration ---
MOONDREAM_MODEL = "moondream"           # For image processing (VLM)
GEMMA_MODEL = "gemma3"                  # For final text synthesis (LLM)
EMBED_MODEL = "nomic-embed-text"        # For generating vector embeddings
DATA_DIR = "./textbooks" 

In [8]:
# Initialize Clients
moondream_client = Ollama(model=MOONDREAM_MODEL, request_timeout=180.0) 
gemma_client = Ollama(model=GEMMA_MODEL, request_timeout=120.0)

In [10]:

# Set Global Settings for the RAG Index (used for embedding and final generation)
Settings.llm = gemma_client # Gemma 3 handles the final answer synthesis
Settings.embed_model = OllamaEmbedding(model_name=EMBED_MODEL)
print(f"Synthesis LLM set to: {GEMMA_MODEL}")
print(f"Embedding Model set to: {EMBED_MODEL}")

Synthesis LLM set to: gemma3
Embedding Model set to: nomic-embed-text


In [12]:
# Load all documents/files
documents = SimpleDirectoryReader(DATA_DIR).load_data()
print(f"Loaded {len(documents)} initial documents/files.")

multimodal_nodes = []
image_files = glob.glob(os.path.join(DATA_DIR, "*.[jJpP]*"))
print(f"Found {len(image_files)} images to process using {MOONDREAM_MODEL}.")

for image_path in image_files:
    print(f"Processing image: {image_path}...")
    try:
        # Moondream processes the image and returns a text caption
        response = moondream_client.generate(
            prompt="Describe this image in detail, focusing on key content and context for a RAG system.",
            images=[image_path] 
        )
        
        caption = response.text.strip()
        
        # Create a TextNode to represent the image data
        image_node = TextNode(
            text=f"Image Description for {os.path.basename(image_path)}: {caption}",
            metadata={"file_path": image_path, "type": "moondream_description"}
        )
        multimodal_nodes.append(image_node)
        print(f"  -> Caption: {caption[:50]}...")
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

# Combine text documents and image description nodes
all_nodes = documents + multimodal_nodes
print(f"Total nodes for indexing: {len(all_nodes)}")

Loaded 4354 initial documents/files.
Found 51 images to process using moondream.
Processing image: ./textbooks\2019BurkovTheHundred-pageMachineLearning.pdf...
Error processing ./textbooks\2019BurkovTheHundred-pageMachineLearning.pdf: 'Ollama' object has no attribute 'generate'
Processing image: ./textbooks\Cloud Computing - Theory and Practice- Marinescu, Dan C.pdf...
Error processing ./textbooks\Cloud Computing - Theory and Practice- Marinescu, Dan C.pdf: 'Ollama' object has no attribute 'generate'
Processing image: ./textbooks\cryptography.pdf...
Error processing ./textbooks\cryptography.pdf: 'Ollama' object has no attribute 'generate'
Processing image: ./textbooks\Data Science.pdf...
Error processing ./textbooks\Data Science.pdf: 'Ollama' object has no attribute 'generate'
Processing image: ./textbooks\Hands-On_Machine_Learning_with_Scikit-Learn-Keras-and-TensorFlow-2nd-Edition-Aurelien-Geron.pdf...
Error processing ./textbooks\Hands-On_Machine_Learning_with_Scikit-Learn-Keras-and-T

In [14]:
# The embedding model indexes all text data
print("Creating and indexing the vector store...")
index = VectorStoreIndex.from_documents(all_nodes)
print("Indexing complete.")

# Create the query engine. It uses Gemma 3 for the final response.
query_engine = index.as_query_engine(
    similarity_top_k=5, 
    response_mode="compact" # Recommended for RAG tasks
)

# --- Query 1: Pure document retrieval and Gemma synthesis ---
query_text = "Summarize the key findings from the annual report."
print(f"\n--- Query 1: {query_text} ---")
response_text = query_engine.query(query_text)
print("Response (via Gemma 3):\n", str(response_text))

# --- Query 2: Multimodal retrieval (uses Moondream's description) and Gemma synthesis ---
query_image = "Based on the image descriptions, what visual data supports the claim about market share growth?"
print(f"\n--- Query 2: {query_image} ---")
response_image = query_engine.query(query_image)
print("Response (via Gemma 3):\n", str(response_image))

Creating and indexing the vector store...


2025-10-11 16:31:44,587 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:44,845 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:46,423 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:47,529 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:47,730 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:48,026 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:49,743 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:51,268 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:53,043 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-10-11 16:31:54,569 - INFO - HTTP Request: POST http://localhost:1143

Indexing complete.


2025-10-11 17:38:53,945 - INFO - HTTP Request: POST http://localhost:11434/api/show "HTTP/1.1 200 OK"
2025-10-11 17:38:54,041 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"



--- Query 1: Summarize the key findings from the annual report. ---


ReadTimeout: timed out