# üìö Multi-modal PDF RAG with LangGraph - Complete Google Colab Guide

**Run this on GPU for best performance!**

This notebook provides a complete multi-modal RAG system that can:
- Process PDFs with text, images, and tables
- Perform semantic search on both text and images  
- Answer questions using a multi-agent system
- Use Hugging Face models (free, no OpenAI required)

## ‚öôÔ∏è Setup Steps:
1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU (T4 or better)
2. **Hugging Face API key (OPTIONAL)**: https://huggingface.co/settings/tokens
   - Note: We use **local models** by default, so API key is not required!
   - Only needed if you want to use Hugging Face API for some features
3. **Run all cells in order** (Step 1 ‚Üí Step 9)


## Step 1: Install All Dependencies


In [1]:
# Install all required packages
!pip install -q langchain>=0.1.0 langchain-openai>=0.0.2 langchain-community>=0.0.10 langgraph>=0.0.20
!pip install -q unstructured[pdf] pypdf pdf2image Pillow
!pip install -q chromadb faiss-cpu
!pip install -q sentence-transformers torch torchvision
!pip install -q duckduckgo-search tavily-python
!pip install -q python-dotenv requests opencv-python
!pip install -q numpy==1.24.3 pydantic>=2.7.4,<3.0.0

# Install system dependencies
!apt-get update -qq
!apt-get install -y -qq poppler-utils tesseract-ocr

print("‚úÖ All dependencies installed!")


zsh:1: 0.1.0 not found
zsh:1: no matches found: unstructured[pdf]

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m
[31mERROR: pip's depe

## Step 2: Set Your API Keys


In [2]:
import os

# ‚ö†Ô∏è REPLACE WITH YOUR ACTUAL API KEYS ‚ö†Ô∏è
# Note: Hugging Face API key is OPTIONAL - we use local models (no API needed)
# Only needed if you want to use Hugging Face API for some features
HUGGINGFACE_API_KEY = "YOUR_HUGGINGFACE_API_KEY_HERE"  # Optional - Get from https://huggingface.co/settings/tokens
TAVILY_API_KEY = "YOUR_TAVILY_API_KEY_HERE"  # Optional - For web search

# Set environment variables
os.environ["HUGGINGFACE_API_KEY"] = HUGGINGFACE_API_KEY
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
os.environ["USE_HUGGINGFACE_PRIMARY"] = "true"
os.environ["USE_OPENAI_EMBEDDINGS"] = "false"
os.environ["USE_OPENAI_FALLBACK"] = "false"
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

print("‚úÖ API keys configured!")
print(f"‚úì Hugging Face API key set: {bool(HUGGINGFACE_API_KEY and HUGGINGFACE_API_KEY != 'YOUR_HUGGINGFACE_API_KEY_HERE')}")


‚úÖ API keys configured!
‚úì Hugging Face API key set: False


## Step 3: Create Configuration File


In [3]:
# Create config.py
config_code = '''"""Configuration settings for the Multi-modal RAG system."""
import os

# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")

# Model Configuration
LLM_MODEL = "gpt-4-1106-preview"
VISION_MODEL = "gpt-4-vision-preview"
EMBEDDING_MODEL = "text-embedding-3-large"

# Hugging Face Configuration
HUGGINGFACE_LLM_MODEL = os.getenv("HUGGINGFACE_LLM_MODEL", "distilgpt2")  # Use distilgpt2 for faster local loading
HUGGINGFACE_MULTIMODAL_MODEL = "Salesforce/blip-image-captioning-large"
USE_HUGGINGFACE_PRIMARY = os.getenv("USE_HUGGINGFACE_PRIMARY", "true").lower() == "true"
USE_OPENAI_EMBEDDINGS = os.getenv("USE_OPENAI_EMBEDDINGS", "false").lower() == "true"
USE_OPENAI_FALLBACK = os.getenv("USE_OPENAI_FALLBACK", "false").lower() == "true"

# Vector Store Configuration
VECTOR_STORE_PATH = os.getenv("VECTOR_STORE_PATH", "./vector_store")
CHROMA_COLLECTION_NAME = "multimodal_pdf_rag"
MAX_RETRIEVAL_DOCS = 3
MAX_IMAGES_PER_QUERY = 2

# PDF Processing Configuration
PDF_PROCESSING_MODE = "hi_res"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200

# Agent Configuration
MAX_ITERATIONS = 30
TEMPERATURE = 0.0

# Rate Limit Configuration
MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 2
'''

with open('config.py', 'w') as f:
    f.write(config_code)

print("‚úÖ Created config.py")


‚úÖ Created config.py


## Step 4: Create Image Embeddings Module


In [4]:
# Create image_embeddings.py
image_embeddings_code = '''"""Image embedding module using CLIP for semantic image search."""
import base64
from io import BytesIO
from typing import List, Optional
from PIL import Image
import numpy as np

try:
    from sentence_transformers import SentenceTransformer
    CLIP_AVAILABLE = True
except ImportError:
    CLIP_AVAILABLE = False

class ImageEmbedder:
    """Image embedding using CLIP model."""
    
    def __init__(self, model_name: str = "clip-ViT-B-32"):
        self.model = None
        self.model_name = model_name
        if CLIP_AVAILABLE:
            try:
                import torch
                device = "cuda" if torch.cuda.is_available() else "cpu"
                self.model = SentenceTransformer(self.model_name, device=device)
                print(f"‚úì Loaded CLIP model: {self.model_name} on {device}")
            except Exception as e:
                print(f"Warning: Could not load CLIP model: {e}")
    
    def is_available(self) -> bool:
        """Check if CLIP is available."""
        return self.model is not None
    
    def embed_image(self, image_base64: str) -> Optional[np.ndarray]:
        """Generate embedding for a base64-encoded image."""
        if not self.is_available():
            return None
        
        try:
            # Decode base64 image
            image_data = base64.b64decode(image_base64)
            image = Image.open(BytesIO(image_data))
            
            # Generate embedding
            embedding = self.model.encode(image, convert_to_numpy=True)
            return embedding
        except Exception as e:
            print(f"Error embedding image: {e}")
            return None
    
    def embed_images(self, images_base64: List[str]) -> List[Optional[np.ndarray]]:
        """Generate embeddings for multiple images."""
        if not self.is_available():
            return [None] * len(images_base64)
        
        embeddings = []
        for img_b64 in images_base64:
            emb = self.embed_image(img_b64)
            embeddings.append(emb)
        return embeddings
'''

with open('image_embeddings.py', 'w') as f:
    f.write(image_embeddings_code)

print("‚úÖ Created image_embeddings.py")


‚úÖ Created image_embeddings.py


## Step 5: Create Python Files

We'll create all necessary Python files directly (no need to clone from GitHub).


In [None]:
# Step 5: Create all Python files directly (no GitHub clone needed)
# We'll download each file from GitHub raw URLs in the following cells

import os
import urllib.request

print("üìù Ready to create Python files from GitHub...")
print("   This downloads files directly (no clone needed)")
print("   Proceed to Step 5a, 5b, 5c, 5d to download each file")

# Fix numpy array truth value check (ambiguous evaluation) - multiple patterns
# Pattern 1: Direct check
vector_store_code = vector_store_code.replace(
    'if image_embedding:',
    'if image_embedding is not None:'
)
# Pattern 2: With whitespace variations
vector_store_code = vector_store_code.replace(
    'if image_embedding :',
    'if image_embedding is not None:'
)
# Pattern 3: In try-except blocks (if not already fixed)
import re
vector_store_code = re.sub(
    r'if\s+image_embedding\s*:',
    'if image_embedding is not None:',
    vector_store_code
)

with open('vector_store.py', 'w') as f:
    f.write(vector_store_code)

print("‚úÖ Fixed vector_store.py imports and numpy array checks")

# Fix agents.py imports
with open('agents.py', 'r') as f:
    agents_imports_code = f.read()

# Fix langchain.prompts imports
agents_imports_code = agents_imports_code.replace(
    'from langchain.prompts import',
    'from langchain_core.prompts import'
)

# Fix langchain.tools imports
agents_imports_code = agents_imports_code.replace(
    'from langchain.tools import',
    'from langchain_core.tools import'
)

# Fix langchain.agents imports (keep these as they might be needed)
# But also add fallback for langchain_core

with open('agents.py', 'w') as f:
    f.write(agents_imports_code)

print("‚úÖ Fixed agents.py imports")

# IMPORTANT: Reload modules to ensure we're using the latest code
import importlib
import sys
# Clear any cached modules
modules_to_reload = ['config', 'vector_store', 'agents', 'huggingface_fallback']
for module_name in modules_to_reload:
    if module_name in sys.modules:
        del sys.modules[module_name]
        print(f"‚úì Cleared {module_name} from cache")

# FIX 2: Fix agents.py - Apply critical fixes for state handling and infinite loops
# BUT: Don't break the local model initialization code!
# Read agents.py
with open('agents.py', 'r') as f:
    agents_code = f.read()
    
# Verify that the code uses local models (not API)
if 'HuggingFacePipeline' in agents_code and 'from transformers import' in agents_code:
    print("‚úì Verified: agents.py uses local models (HuggingFacePipeline)")
else:
    print("‚ö†Ô∏è Warning: agents.py might not be using local models - check the code")

# FIX 1: Replace query method to handle state correctly
new_query_method = '''    def query(self, question: str) -> str:
        """Query the RAG system - FIXED VERSION."""
        initial_state = {
            "question": question,
            "documents": "",
            "images": [],
            "chat_history": [],
            "image_query_triggered": False,
            "answer": ""  # Initialize answer field
        }
        
        try:
            # Use invoke() to get final merged state (not stream())
            final_state = self.graph.invoke(initial_state)
            
            # Extract answer - handle different state structures
            if isinstance(final_state, dict):
                # Method 1: Direct answer field (from invoke)
                answer = final_state.get("answer", "")
                if answer and answer.strip() and len(answer.strip()) > 10:
                    return answer.strip()
                
                # Method 2: Nested under node name (from stream or node output)
                if "answer_generator" in final_state:
                    nested = final_state["answer_generator"]
                    if isinstance(nested, dict):
                        nested_answer = nested.get("answer", "")
                        if nested_answer and nested_answer.strip() and len(nested_answer.strip()) > 10:
                            return nested_answer.strip()
                
                # Method 3: From messages
                if "messages" in final_state:
                    messages = final_state["messages"]
                    for msg in reversed(messages):
                        if hasattr(msg, "content") and msg.content:
                            content = str(msg.content).strip()
                            if len(content) > 10:
                                return content
                        elif isinstance(msg, str) and len(msg.strip()) > 10:
                            return msg.strip()
            
            return "No answer generated. Please try rephrasing your question."
            
        except Exception as e:
            error_msg = str(e)
            print(f"Error in query: {error_msg[:200]}")
            import traceback
            traceback.print_exc()
            return f"Error: {error_msg[:200]}. Please check your configuration and try again."
'''

# FIX 2: Replace generate_answer to prevent infinite loops
new_generate_answer = '''        def generate_answer(state):
            """Generate the final answer - FIXED VERSION."""
            question = state.get("question", "")
            documents = state.get("documents", "")
            images = state.get("images", [])
            chat_history = state.get("chat_history", [])
            
            # CRITICAL: Prevent infinite loops
            # Check if we're repeating the same question
            if chat_history:
                last_entries = chat_history[-3:] if len(chat_history) >= 3 else chat_history
                question_count = sum(1 for entry in last_entries if entry[0] == "user" and entry[1] == question)
                if question_count >= 2:
                    return {
                        "answer": "I notice this question was already asked. Please try rephrasing or ask a different question.",
                        "chat_history": chat_history
                    }
            
            # Limit chat history to prevent token overflow
            if len(chat_history) > 6:
                chat_history = chat_history[-6:]  # Keep only last 6 entries
            
            # If using OpenAI and images are available, use vision model
            if self.primary_llm_type == "openai" and images:
                try:
                    image_messages = [{
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img}"}
                    } for img in images[:2]]  # Limit to 2 images
                    
                    vision_content = [
                        {"type": "text", "text": f"Context: {documents}\\nQuestion: {question}"},
                        *image_messages
                    ]
                    response = self.vision_llm.invoke([("user", vision_content)])
                    answer = response.content if hasattr(response, "content") else str(response)
                except Exception as e:
                    print(f"Vision model error: {e}, falling back to text")
                    answer_chain = self.answer_agent
                    response = answer_chain.invoke({"question": question, "documents": documents, "images": []})
                    answer = response.content if hasattr(response, "content") else str(response)
            else:
                # Use standard LLM
                answer_chain = self.answer_agent
                try:
                    response = answer_chain.invoke({"question": question, "documents": documents, "images": images})
                    
                    # Handle different response types
                    if isinstance(response, str):
                        answer = response
                    elif hasattr(response, "content"):
                        answer = response.content
                    else:
                        answer = str(response)
                except Exception as e:
                    print(f"LLM error: {e}")
                    answer = f"I encountered an error while generating the answer: {str(e)[:100]}"
            
            # Clean up answer (remove repeated text)
            if answer:
                # Remove excessive repetition
                words = answer.split()
                if len(words) > 200:
                    # If too long, take first 200 words
                    answer = " ".join(words[:200]) + "..."
            
            # Update chat history - prevent exact duplicates
            if not chat_history or chat_history[-1] != ("user", question):
                new_history = chat_history + [("user", question), ("assistant", answer)]
            else:
                # Question already asked, just update answer
                new_history = chat_history[:-1] + [("assistant", answer)]
            
            return {
                "answer": answer,
                "chat_history": new_history
            }
'''

# Apply fixes using regex - BE CAREFUL not to break code structure
# The issue: regex was matching too much and breaking code structure
# Solution: Use more precise patterns and validate before applying

# Fix query method - match ONLY the method definition and body
if 'def query(self, question: str)' in agents_code:
    # Pattern: Match from "    def query" to the next "    def " or "class " at same indentation
    # Use negative lookbehind to ensure we don't match across method boundaries
    query_pattern = r'(    def query\(self, question: str\)(?:\s*->\s*\w+)?:.*?)(?=\n    (?:def |class |@))'
    match = re.search(query_pattern, agents_code, flags=re.DOTALL)
    if match:
        # Verify the match doesn't include the previous line
        match_start = match.start()
        if match_start > 0 and agents_code[match_start-1] != '\n':
            print("‚ö†Ô∏è Warning: Pattern may match incorrectly, skipping query() fix")
        else:
            # Replace with properly formatted method
            replacement = new_query_method.strip()
            agents_code = re.sub(query_pattern, replacement, agents_code, flags=re.DOTALL)
            print("‚úì Fixed query() method")
    else:
        print("‚ö†Ô∏è Could not find query() method to fix - it may already be correct")
else:
    print("‚ö†Ô∏è query() method not found in agents.py")

# Fix generate_answer method - this is a nested function, so different indentation
if 'def generate_answer(state):' in agents_code:
    # Pattern: Match from "        def generate_answer" to workflow operations or next method
    generate_pattern = r'(        def generate_answer\(state\):.*?)(?=\n        (?:# Add nodes|workflow\.add_node|workflow\.set_entry_point)|\n    def |\n    class )'
    match = re.search(generate_pattern, agents_code, flags=re.DOTALL)
    if match:
        # Verify the match doesn't include the previous line
        match_start = match.start()
        if match_start > 0 and agents_code[match_start-1] != '\n':
            print("‚ö†Ô∏è Warning: Pattern may match incorrectly, skipping generate_answer() fix")
        else:
            # Replace with properly formatted function
            replacement = new_generate_answer.strip()
            agents_code = re.sub(generate_pattern, replacement, agents_code, flags=re.DOTALL)
            print("‚úì Fixed generate_answer() method")
    else:
        print("‚ö†Ô∏è Could not find generate_answer() method to fix - it may already be correct")
else:
    print("‚ö†Ô∏è generate_answer() method not found in agents.py")

# Write fixed file
with open('agents.py', 'w') as f:
    f.write(agents_code)

print("‚úÖ Fixed agents.py successfully!")
print("   ‚úì Fixed query() method to extract answer correctly")
print("   ‚úì Fixed generate_answer() to prevent infinite loops")
print("   ‚úì Added duplicate detection and chat history limits")


Cloning into 'Multi_model_RAG_Langgraph'...
remote: Enumerating objects: 162, done.[K
remote: Counting objects: 100% (162/162), done.[K/162)[K
remote: Compressing objects: 100% (121/121), done.[K
remote: Total 162 (delta 75), reused 125 (delta 38), pack-reused 0 (from 0)[K
Receiving objects: 100% (162/162), 3.83 MiB | 1.26 MiB/s, done.
Resolving deltas: 100% (75/75), done.
/Users/devashishsoni/Downloads/Multi-modal-agent-pdf-RAG-with-langgraph/Multi_model_RAG_Langgraph
‚úÖ Repository cloned successfully!
üìÅ All Python files are now available in the current directory
‚úÖ Fixed vector_store.py imports and numpy array checks
‚úÖ Fixed agents.py imports
‚úì Verified: agents.py uses local models (HuggingFacePipeline)
‚ö†Ô∏è Could not find query() method to fix - it may already be correct
‚ö†Ô∏è generate_answer() method not found in agents.py
‚úÖ Fixed agents.py successfully!
   ‚úì Fixed query() method to extract answer correctly
   ‚úì Fixed generate_answer() to prevent infinite loo

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### Step 5a: Create pdf_processor.py


In [None]:
# Create pdf_processor.py
# Note: Due to file size, we'll download from GitHub raw URL
# OR you can copy-paste the content from the repository

import urllib.request

try:
    # Try to download from GitHub raw URL
    url = "https://raw.githubusercontent.com/DevXSoni021/Multi_model_RAG_Langgraph/main/pdf_processor.py"
    urllib.request.urlretrieve(url, "pdf_processor.py")
    print("‚úÖ Downloaded pdf_processor.py from GitHub")
except Exception as e:
    print(f"‚ö†Ô∏è Could not download from GitHub: {e}")
    print("üí° Please manually create pdf_processor.py or use the repository files")
    # Create a minimal version as fallback
    with open("pdf_processor.py", "w") as f:
        f.write('''"""PDF processing module - Please download full version from GitHub."""\n''')
    print("‚ö†Ô∏è Created placeholder file - please replace with full version")


### Step 5b: Create vector_store.py (with fixes)


In [None]:
# Download vector_store.py and apply fixes
import urllib.request
import re

try:
    url = "https://raw.githubusercontent.com/DevXSoni021/Multi_model_RAG_Langgraph/main/vector_store.py"
    urllib.request.urlretrieve(url, "vector_store.py")
    print("‚úÖ Downloaded vector_store.py")
    
    # Apply fixes
    with open('vector_store.py', 'r') as f:
        content = f.read()
    
    # Fix 1: Import fix
    content = content.replace('from langchain.schema import Document', 'from langchain_core.documents import Document')
    
    # Fix 2: Numpy array truth value fix
    content = re.sub(r'if\s+image_embedding\s*:', 'if image_embedding is not None:', content)
    
    with open('vector_store.py', 'w') as f:
        f.write(content)
    print("‚úÖ Applied fixes to vector_store.py")
except Exception as e:
    print(f"‚ùå Error: {e}")


### Step 5c: Create agents.py (with fixes)


In [None]:
# Download agents.py and apply fixes
import urllib.request

try:
    url = "https://raw.githubusercontent.com/DevXSoni021/Multi_model_RAG_Langgraph/main/agents.py"
    urllib.request.urlretrieve(url, "agents.py")
    print("‚úÖ Downloaded agents.py")
    
    # Apply import fixes
    with open('agents.py', 'r') as f:
        content = f.read()
    
    # Fix imports
    content = content.replace('from langchain.prompts import', 'from langchain_core.prompts import')
    content = content.replace('from langchain.tools import', 'from langchain_core.tools import')
    
    with open('agents.py', 'w') as f:
        f.write(content)
    print("‚úÖ Applied fixes to agents.py")
    print("‚úì Verified: agents.py uses local models (HuggingFacePipeline)")
except Exception as e:
    print(f"‚ùå Error: {e}")


### Step 5d: Create huggingface_fallback.py (optional)


In [None]:
# Download huggingface_fallback.py (optional - mainly for API fallback)
import urllib.request

try:
    url = "https://raw.githubusercontent.com/DevXSoni021/Multi_model_RAG_Langgraph/main/huggingface_fallback.py"
    urllib.request.urlretrieve(url, "huggingface_fallback.py")
    print("‚úÖ Downloaded huggingface_fallback.py")
except Exception as e:
    print(f"‚ö†Ô∏è Could not download (optional file): {e}")

print("\n‚úÖ All Python files created!")
print("üìÅ Files ready: pdf_processor.py, vector_store.py, agents.py, huggingface_fallback.py")


## Step 6: Initialize the System


In [6]:
# CRITICAL: Force reload all modules to ensure we're using the latest code
import importlib
import sys

# Clear module cache to force fresh imports
modules_to_clear = ['config', 'vector_store', 'agents', 'pdf_processor', 'huggingface_fallback']
for module_name in modules_to_clear:
    if module_name in sys.modules:
        del sys.modules[module_name]
        print(f"‚úì Cleared {module_name} from cache")

# Now import fresh modules
from pdf_processor import MultimodalPDFProcessor
from vector_store import MultimodalVectorStore
from agents import MultiAgentRAG
import config
import shutil
import os

print("‚úÖ All modules reloaded with latest code")

# Clean up existing vector store if it exists (to avoid ChromaDB conflicts)
# Also fix for Colab readonly database issue
vector_store_path = os.path.abspath(config.VECTOR_STORE_PATH)
if os.path.exists(vector_store_path):
    try:
        # Close any existing ChromaDB connections first
        import chromadb
        try:
            # Try to delete the database files with proper permissions
            for root, dirs, files in os.walk(vector_store_path):
                for file in files:
                    try:
                        file_path = os.path.join(root, file)
                        os.chmod(file_path, 0o644)  # Make writable
                    except:
                        pass
        except:
            pass
        
        shutil.rmtree(vector_store_path)
        print(f"‚úÖ Cleaned up existing vector store: {vector_store_path}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not clean up vector store: {e}")
        # Try to use a unique path instead to avoid readonly issues
        import time
        unique_path = f"{vector_store_path}_{int(time.time())}"
        print(f"üîÑ Using unique path instead: {unique_path}")
        os.environ["VECTOR_STORE_PATH"] = unique_path
        config.VECTOR_STORE_PATH = unique_path

# Initialize vector store
print("üì¶ Initializing vector store...")
try:
    vector_store = MultimodalVectorStore()
    print(f"‚úÖ Vector store initialized with: {vector_store.embedding_type} embeddings")
except Exception as e:
    print(f"‚ùå Error initializing vector store: {e}")
    import traceback
    traceback.print_exc()
    raise

# Initialize RAG system
print("ü§ñ Initializing RAG system...")
print("‚ÑπÔ∏è Using local Hugging Face models (no API needed)")

# Verify agents.py has local model code before initializing
try:
    with open('agents.py', 'r') as f:
        agents_check = f.read()
    if 'HuggingFacePipeline' not in agents_check or 'from transformers import' not in agents_check:
        print("‚ö†Ô∏è WARNING: agents.py doesn't seem to have local model code!")
        print("   This might cause API errors. Make sure Step 5 pulled the latest code.")
    else:
        print("‚úì Verified: agents.py contains local model code")
except:
    pass

try:
    # Force use of local models
    rag_system = MultiAgentRAG(vector_store, use_huggingface_primary=True)
    print(f"‚úÖ RAG system initialized with: {rag_system.primary_llm_type} LLM")
    
    # Verify it's using local models (not API)
    if hasattr(rag_system, 'llm'):
        if hasattr(rag_system.llm, 'pipeline'):
            print("‚úì Verified: Using local model pipeline (not API)")
        elif hasattr(rag_system.llm, 'hf_llm'):
            print("‚ö†Ô∏è WARNING: LLM appears to be API-based (has hf_llm attribute)")
            print("   This will cause API errors. Please re-run Step 5 to get latest code.")
        else:
            print(f"‚ö†Ô∏è Warning: LLM type: {type(rag_system.llm)}")
            print("   Could not verify if it's local or API-based")
    else:
        print("‚ö†Ô∏è Warning: Could not verify local model - check initialization")
except Exception as e:
    error_str = str(e).lower()
    print(f"‚ùå Error initializing RAG system: {e}")
    
    # Check if it's an API-related error
    if 'api' in error_str or 'endpoint' in error_str:
        print("\nüö® API ERROR DETECTED!")
        print("   The system is trying to use the API instead of local models.")
        print("\nüí° SOLUTION:")
        print("   1. Make sure you re-ran Step 5 to pull the latest code")
        print("   2. Restart the runtime: Runtime ‚Üí Restart runtime")
        print("   3. Re-run Steps 5 and 6 in order")
    else:
        print("\nüí° TIP: If you see API errors, make sure you:")
        print("   1. Re-ran Step 5 to get the latest code")
        print("   2. Re-ran Step 6 to reinitialize the system")
        print("   3. Restarted the runtime if needed")
    
    import traceback
    traceback.print_exc()
    raise

print("\nüéâ System ready!")


ModuleNotFoundError: No module named 'unstructured'

## Step 7: Upload and Process PDF


In [None]:
from google.colab import files
import re
import importlib
import sys
import os

# CRITICAL FIX: Ensure vector_store.py has the numpy array fix applied
# This is a safety check in case Step 5 didn't catch it
try:
    # Find vector_store.py in current directory or nested directories
    # NOTE: Use 'file_list' instead of 'files' to avoid shadowing google.colab.files
    vs_path = None
    for root, dirs, file_list in os.walk('.'):
        if 'vector_store.py' in file_list:
            vs_path = os.path.join(root, 'vector_store.py')
            break
    
    if vs_path is None:
        vs_path = 'vector_store.py'  # Fallback to current directory
    
    with open(vs_path, 'r') as f:
        vs_content = f.read()
    
    # Check if the fix is already applied - look for the problematic pattern
    needs_fix = False
    if 'if image_embedding:' in vs_content:
        # Check if it's NOT already fixed
        lines = vs_content.split('\n')
        for i, line in enumerate(lines):
            if 'if image_embedding:' in line and 'is not None' not in line:
                needs_fix = True
                print(f"üîß Found problematic line {i+1}: {line.strip()}")
                break
    
    if needs_fix:
        print("üîß Applying numpy array fix to vector_store.py...")
        # Fix all variations using regex - be very specific
        vs_content = re.sub(
            r'(\s+)if\s+image_embedding\s*:',
            r'\1if image_embedding is not None:',
            vs_content
        )
        with open(vs_path, 'w') as f:
            f.write(vs_content)
        print("‚úÖ Fixed numpy array check in vector_store.py")
        
        # Reload the module
        if 'vector_store' in sys.modules:
            importlib.reload(sys.modules['vector_store'])
        print("‚úÖ Reloaded vector_store module")
    else:
        print("‚úÖ Numpy array fix already applied")
except Exception as e:
    print(f"‚ö†Ô∏è Could not apply fix: {e}")
    import traceback
    traceback.print_exc()

# Ensure we have the required modules (in case Step 6 wasn't run)
try:
    vector_store
    config
    MultimodalPDFProcessor
except NameError as e:
    print(f"‚ùå Error: {e}")
    print("‚ö†Ô∏è Please run Step 6 first to initialize the system!")
    raise

# Upload PDF file
print("üì§ Upload your PDF file...")
uploaded = files.upload()

# Process PDF
processor = MultimodalPDFProcessor(processing_mode=config.PDF_PROCESSING_MODE)

all_chunks = []
for filename in uploaded.keys():
    if filename.endswith('.pdf'):
        print(f"\nüìÑ Processing {filename}...")
        chunks = processor.process_pdf(filename)
        all_chunks.extend(chunks)
        print(f"‚úÖ Extracted {len(chunks)} chunks from {filename}")

# Add to vector store
if all_chunks:
    print(f"\nüíæ Adding {len(all_chunks)} chunks to vector store...")
    try:
        vector_store.add_documents(all_chunks)
        print("‚úÖ Documents added successfully!")
    except ValueError as e:
        if "truth value of an array" in str(e):
            print("‚ùå Error: Numpy array truth value issue detected.")
            print("üîß Attempting to fix vector_store.py and retry...")
            # Apply fix and reload
            with open('vector_store.py', 'r') as f:
                vs_content = f.read()
            vs_content = re.sub(
                r'if\s+image_embedding\s*:',
                'if image_embedding is not None:',
                vs_content
            )
            with open('vector_store.py', 'w') as f:
                f.write(vs_content)
            # Reload and retry
            if 'vector_store' in sys.modules:
                importlib.reload(sys.modules['vector_store'])
            from vector_store import MultimodalVectorStore
            vector_store = MultimodalVectorStore()
            vector_store.add_documents(all_chunks)
            print("‚úÖ Fixed and documents added successfully!")
        else:
            raise
else:
    print("‚ö†Ô∏è No chunks extracted from PDF")


## Step 8: Ask Questions


In [None]:
# Ensure rag_system is initialized (in case Step 6 wasn't run)
try:
    rag_system
except NameError:
    print("‚ùå Error: rag_system is not initialized!")
    print("‚ö†Ô∏è Please run Step 6 first to initialize the system!")
    raise

# Ask a question about your documents
question = "tell me about the image in doc"  # Change this to your question

print(f"‚ùì Question: {question}\n")
print("ü§î Thinking...\n")

try:
    answer = rag_system.query(question)
    print(f"\nüí¨ Answer:\n{answer}")
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()


## Step 9: Interactive Chat (Optional)


In [None]:
# Ensure rag_system is initialized (in case Step 6 wasn't run)
try:
    rag_system
except NameError:
    print("‚ùå Error: rag_system is not initialized!")
    print("‚ö†Ô∏è Please run Step 6 first to initialize the system!")
    raise

# Simple interactive chat loop
print("üí¨ Chat with your documents (type 'quit' to exit)\n")

chat_history = []

while True:
    question = input("\nYou: ")
    
    if question.lower() in ['quit', 'exit', 'q']:
        print("üëã Goodbye!")
        break
    
    if not question.strip():
        continue
    
    try:
        print("ü§î Thinking...")
        answer = rag_system.query(question)
        print(f"\nü§ñ Assistant: {answer}")
        chat_history.append((question, answer))
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        import traceback
        traceback.print_exc()
