In [1]:
!pip install pypdf langchain-text-splitters sentence-transformers faiss-cpu google-generativeai python-dotenv PyMuPDF Pillow
!pip install -q rank-bm25

Collecting pypdf
  Downloading pypdf-6.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Collecting PyMuPDF
  Downloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading pypdf-6.3.0-py3-none-any.whl (328 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m328.9/328.9 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.6 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m23.6/23.6 MB[0m [31m39.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î

In [2]:
import fitz
from PIL import Image
import io
import numpy as np
import re
from collections import defaultdict
from sentence_transformers import SentenceTransformer
import faiss
import pickle
from rank_bm25 import BM25Okapi
import google.generativeai as genai
import os

In [3]:
# STEP 1: EXTRACT AND CLASSIFY PAGES
# ============================================================================

print("="*80)
print("STEP 1: EXTRACTING AND CLASSIFYING PAGES")
print("="*80)

pdf_path = "/content/drive/MyDrive/boing_RAG/Boeing B737 Manual.pdf"
doc = fitz.open(pdf_path)

pages = []

for i, page in enumerate(doc):
    text = page.get_text()
    text_length = len(text.strip())

    images = page.get_images()

    # Render page at low res to analyze visual density
    pix = page.get_pixmap(dpi=72)
    img_bytes = pix.tobytes("png")
    img = Image.open(io.BytesIO(img_bytes))

    # Convert to grayscale
    gray = img.convert('L')
    pixels = np.array(gray)

    # Calculate "ink density"
    non_white_pixels = np.sum(pixels < 240)
    total_pixels = pixels.size
    ink_density = non_white_pixels / total_pixels

    # Check for intentionally blank
    is_blank = "intentionally blank" in text.lower()

    # Classification: Diagram if high ink density or has images
    is_diagram = (
        len(images) > 0 or
        (ink_density > 0.14 and not is_blank)
    )

    page_data = {
        "page_number": i + 1,
        "text": text.strip(),
        "char_count": text_length,
        "has_images": len(images) > 0,
        "ink_density": ink_density,
        "is_blank": is_blank,
        "is_diagram": is_diagram,
        "page_image": None
    }

    pages.append(page_data)

# Render only diagram pages at high resolution
for i, page_data in enumerate(pages):
    if page_data["is_diagram"]:
        page = doc[i]
        pix = page.get_pixmap(dpi=150)
        page_data["page_image"] = pix.tobytes("png")

doc.close()

diagram_pages = [p["page_number"] for p in pages if p["is_diagram"]]
text_pages = [p["page_number"] for p in pages if not p["is_diagram"]]

print(f" Total pages: {len(pages)}")
print(f"   Diagram pages: {len(diagram_pages)}")
print(f"   Text pages: {len(text_pages)}")

STEP 1: EXTRACTING AND CLASSIFYING PAGES
 Total pages: 146
   Diagram pages: 67
   Text pages: 79


In [4]:
# ============================================================================
# IMPROVED STEP 2: BETTER PERFORMANCE TABLE IDENTIFICATION
# ============================================================================

print("\n" + "="*80)
print("STEP 2: IDENTIFYING PERFORMANCE TABLE PAGES (IMPROVED)")
print("="*80)

performance_table_pages = {}

for page in pages:
    content = page['text']
    content_lower = content.lower()

    is_perf_table = False
    table_info = {}

    # Pattern 1: Field & Climb Limit Weights tables (STRICTER)
    # Must have BOTH "limit weight" AND "pressure altitude" AND table structure
    if (('field limit weight' in content_lower or 'climb limit' in content_lower) and
        'pressure altitude' in content_lower and
        ('1000 kg' in content_lower or 'corr' in content_lower)):  # Table structure indicators

        alt_match = re.search(r'(\d+)\s*FT\s*Pressure\s*Altitude', content, re.IGNORECASE)
        if alt_match:
            altitude = alt_match.group(1)

            runway_condition = "unknown"
            if "wet runway" in content_lower:
                runway_condition = "wet"
            elif "dry runway" in content_lower:
                runway_condition = "dry"

            flap_match = re.search(r'Flaps?\s*(\d+)', content, re.IGNORECASE)
            flap_setting = flap_match.group(1) if flap_match else None

            table_info = {
                "type": "field_climb_limits",
                "altitude": altitude,
                "runway_condition": runway_condition,
                "flap_setting": flap_setting
            }
            is_perf_table = True

    # Pattern 2: Flap retraction tables
    elif ('flap' in content_lower and 'retraction' in content_lower and
          'speed' in content_lower and 't/o' in content_lower):
        table_info = {
            "type": "flap_retraction",
            "altitude": None,
            "runway_condition": None
        }
        is_perf_table = True

    # Pattern 3: Landing field limit tables
    elif ('landing field limit' in content_lower and
          'wind corr' in content_lower):
        table_info = {
            "type": "landing_limits",
            "altitude": None,
            "runway_condition": None
        }
        is_perf_table = True

    # DON'T classify as performance table if it's clearly a procedure
    ''' if is_perf_table:
        # Exclude procedure pages
        if any(keyword in content_lower for keyword in [
            'pilot flying', 'pilot not flying',
            'call "', 'verify mode annunciation',
            'position landing gear'
        ]):
            is_perf_table = False
 '''
    ''' procedure_keywords = ['autobrake', 'pilot flying', 'descent', 'approach', 'landing roll', 'checklist']
    if any(k in question.lower() for k in procedure_keywords):
      disable_performance_table_bias = True
 '''
    if is_perf_table:
        performance_table_pages[page['page_number']] = table_info

print(f"‚úÖ Found {len(performance_table_pages)} performance table pages")
print(f"\nüìÑ Performance table pages: {list(performance_table_pages.keys())}")
print(f"   Page 39 is performance table: {39 in performance_table_pages}")
print(f"   Page 126 is performance table: {126 in performance_table_pages}")

by_type = defaultdict(list)
for page_num, info in performance_table_pages.items():
    by_type[info['type']].append(page_num)

for table_type, page_nums in by_type.items():
    print(f"   {table_type}: {sorted(page_nums)}")


STEP 2: IDENTIFYING PERFORMANCE TABLE PAGES (IMPROVED)
‚úÖ Found 6 performance table pages

üìÑ Performance table pages: [40, 41, 82, 83, 85, 86]
   Page 39 is performance table: False
   Page 126 is performance table: False
   flap_retraction: [40, 41]
   field_climb_limits: [82, 83, 85, 86]


In [5]:
# ============================================================================
# STEP 3: IMPROVED CHUNKING STRATEGY
# ============================================================================

print("\n" + "="*80)
print("STEP 3: IMPROVED CHUNKING STRATEGY")
print("="*80)

from langchain_text_splitters import RecursiveCharacterTextSplitter

# Create a more sophisticated text splitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,  # Increased for better context
    chunk_overlap=300,
    separators=["\n\n", "\n", " ", ""]
)

all_chunks = []
enhanced_count = 0

def enhance_performance_table_content(page, table_info):
    """Create more targeted enhancement for performance tables"""
    table_type = table_info['type']
    altitude = table_info.get('altitude') or 'unknown'
    runway = table_info.get('runway_condition') or 'unknown'
    flap = table_info.get('flap_setting') or 'all'

    # Create type-specific enhancements
    if table_type == 'field_climb_limits':
        enhancement = f"""
        PERFORMANCE TABLE: FIELD AND CLIMB LIMIT WEIGHTS
        Altitude: {altitude} FT | Runway: {runway.upper()} | Flaps: {flap}

        This table provides:
        - Field limit weights based on corrected field length and OAT
        - Climb limit weights for the specified conditions

        Keywords: field limit weight, climb limit weight, {altitude} feet,
        pressure altitude, {runway} runway, corrected field length, OAT

        TABLE DATA:
        """
    elif table_type == 'flap_retraction':
        enhancement = f"""
        PERFORMANCE TABLE: FLAP RETRACTION SPEEDS
        This table provides flap retraction speeds for takeoff.

        Keywords: flap retraction, takeoff, speed, flaps
        """
    elif table_type == 'landing_limits':
        enhancement = f"""
        PERFORMANCE TABLE: LANDING FIELD LIMIT WEIGHTS
        This table provides landing field limit weights with wind corrections.

        Keywords: landing field limit, wind correction, landing weight
        """
    else:
        enhancement = f"PERFORMANCE TABLE: {table_type}\n"

    return enhancement + page['text']

for page in pages:
    if page["char_count"] < 20:
        continue

    content = page["text"]
    page_num = page["page_number"]

    # Enhance ALL performance tables, not just field/climb limits
    if page_num in performance_table_pages:
        table_info = performance_table_pages[page_num]
        enhanced_content = enhance_performance_table_content(page, table_info)
        enhanced_count += 1

        all_chunks.append({
            "content": enhanced_content,
            "page_number": page_num,
            "chunk_id": f"page_{page_num}_enhanced",
            "type": "performance_table",
            "page_image": page.get("page_image"),
            "metadata": {
                "source": "Boeing B737 Manual",
                "page": page_num,
                "table_type": table_info['type'],
                "altitude": table_info.get('altitude'),
                "runway_condition": table_info.get('runway_condition'),
                "flap_setting": table_info.get('flap_setting')
            }
        })

    elif page["is_diagram"]:
        # Diagrams/tables - keep whole
        all_chunks.append({
            "content": page["text"],
            "page_number": page_num,
            "chunk_id": f"page_{page_num}_visual",
            "type": "visual",
            "page_image": page["page_image"],
            "metadata": {
                "source": "Boeing B737 Manual",
                "page": page_num,
                "requires_vision": True,
                "ink_density": page["ink_density"]
            }
        })
    else:
        # Use semantic chunking for regular text
        text_chunks = splitter.split_text(page["text"])
        for idx, chunk in enumerate(text_chunks):
            all_chunks.append({
                "content": chunk,
                "page_number": page_num,
                "chunk_id": f"page_{page_num}_chunk_{idx}",
                "type": "text",
                "page_image": None,
                "metadata": {
                    "source": "Boeing B737 Manual",
                    "page": page_num,
                    "requires_vision": False,
                    "chunk_index": idx,
                    "total_chunks": len(text_chunks)
                }
            })

visual_chunks = [c for c in all_chunks if c["type"] == "visual"]
performance_chunks = [c for c in all_chunks if c["type"] == "performance_table"]
text_chunks = [c for c in all_chunks if c["type"] == "text"]

print(f" Chunking Complete!")
print(f"   Total chunks: {len(all_chunks)}")
print(f"   Enhanced performance table chunks: {enhanced_count}")
print(f"   Text chunks: {len(text_chunks)}")
print(f"   Visual chunks: {len(visual_chunks)}")


STEP 3: IMPROVED CHUNKING STRATEGY
 Chunking Complete!
   Total chunks: 189
   Enhanced performance table chunks: 6
   Text chunks: 121
   Visual chunks: 62


In [None]:
# ============================================================================
# STEP 4: CREATE AND LOAD INDEXES
# ============================================================================

print("\n" + "="*80)
print("STEP 4: CREATING INDEXES")
print("="*80)

# Initialize embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

def create_indexes(all_chunks):
    """Create and save FAISS and BM25 indexes"""
    # Create embeddings for all chunks
    print("Creating embeddings...")
    chunk_texts = [chunk["content"] for chunk in all_chunks]
    chunk_embeddings = embedding_model.encode(chunk_texts, show_progress_bar=True)

    # Create FAISS index
    embedding_dim = chunk_embeddings.shape[1]
    index = faiss.IndexFlatIP(embedding_dim)  # Inner product for similarity
    faiss.normalize_L2(chunk_embeddings)  # Normalize for cosine similarity
    index.add(chunk_embeddings)

    # Create BM25 index
    print("Creating BM25 index...")
    tokenized_chunks = [chunk.lower().split() for chunk in chunk_texts]
    bm25 = BM25Okapi(tokenized_chunks)

    print(f"‚úÖ Created FAISS index with {index.ntotal} embeddings")
    print(f"‚úÖ Created BM25 index with {len(tokenized_chunks)} documents")

    # Save indexes for future use
    faiss.write_index(index, "boeing_manual_faiss.index")
    with open("boeing_manual_bm25.pkl", "wb") as f:
        pickle.dump(bm25, f)

    with open("boeing_manual_chunks.pkl", "wb") as f:
        pickle.dump(all_chunks, f)

    print("‚úÖ Saved indexes and chunks to disk")

    return index, bm25, all_chunks

def load_indexes():
    """Load pre-built indexes from disk"""
    try:
        # Load FAISS index
        index = faiss.read_index("boeing_manual_faiss.index")

        # Load BM25 index
        with open("boeing_manual_bm25.pkl", "rb") as f:
            bm25 = pickle.load(f)

        # Load chunks
        with open("boeing_manual_chunks.pkl", "rb") as f:
            all_chunks = pickle.load(f)

        print("‚úÖ Loaded existing indexes from disk")
        return index, bm25, all_chunks
    except:
        print("‚ùå No existing indexes found. Creating new ones...")
        return None, None, None

index, bm25, saved_chunks = load_indexes()

if index is None:
    # Use the all_chunks created in STEP 3
    index, bm25, all_chunks = create_indexes(all_chunks)
else:
    # If indexes existed, load their chunks
    all_chunks = saved_chunks


In [6]:
# ============================================================================
# STEP 4: CREATE AND LOAD INDEXES (FIXED)
# ============================================================================

print("\n" + "="*80)
print("STEP 4: CREATING INDEXES")
print("="*80)

# Initialize embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

def create_indexes(chunks_to_index):
    """Create and save FAISS and BM25 indexes"""
    if chunks_to_index is None or len(chunks_to_index) == 0:
        raise ValueError("No chunks provided for indexing!")

    # Create embeddings for all chunks
    print("Creating embeddings...")
    chunk_texts = [chunk["content"] for chunk in chunks_to_index]
    chunk_embeddings = embedding_model.encode(chunk_texts, show_progress_bar=True)

    # Create FAISS index
    embedding_dim = chunk_embeddings.shape[1]
    index = faiss.IndexFlatIP(embedding_dim)  # Inner product for similarity
    faiss.normalize_L2(chunk_embeddings)  # Normalize for cosine similarity
    index.add(chunk_embeddings)

    # Create BM25 index
    print("Creating BM25 index...")
    tokenized_chunks = [chunk.lower().split() for chunk in chunk_texts]
    bm25 = BM25Okapi(tokenized_chunks)

    print(f"‚úÖ Created FAISS index with {index.ntotal} embeddings")
    print(f"‚úÖ Created BM25 index with {len(tokenized_chunks)} documents")

    # Save indexes for future use
    faiss.write_index(index, "boeing_manual_faiss.index")
    with open("boeing_manual_bm25.pkl", "wb") as f:
        pickle.dump(bm25, f)

    with open("boeing_manual_chunks.pkl", "wb") as f:
        pickle.dump(chunks_to_index, f)

    print("‚úÖ Saved indexes and chunks to disk")

    return index, bm25, chunks_to_index

def load_indexes():
    """Load pre-built indexes from disk"""
    try:
        # Load FAISS index
        index = faiss.read_index("boeing_manual_faiss.index")

        # Load BM25 index
        with open("boeing_manual_bm25.pkl", "rb") as f:
            bm25 = pickle.load(f)

        # Load chunks
        with open("boeing_manual_chunks.pkl", "rb") as f:
            chunks = pickle.load(f)

        print("‚úÖ Loaded existing indexes from disk")
        print(f"   FAISS index size: {index.ntotal}")
        print(f"   Total chunks: {len(chunks)}")
        return index, bm25, chunks
    except FileNotFoundError as e:
        print(f"‚ùå No existing indexes found: {e}")
        return None, None, None
    except Exception as e:
        print(f"‚ùå Error loading indexes: {e}")
        return None, None, None

# Try to load existing indexes first
loaded_index, loaded_bm25, loaded_chunks = load_indexes()

if loaded_index is None:
    # No existing indexes - create new ones using all_chunks from STEP 3
    print("\nüî® Building new indexes from scratch...")

    # Verify all_chunks exists and has content
    if 'all_chunks' not in locals() or all_chunks is None or len(all_chunks) == 0:
        raise ValueError("all_chunks from STEP 3 is not available or empty!")

    print(f"üìä Processing {len(all_chunks)} chunks from STEP 3")
    index, bm25, indexed_chunks = create_indexes(all_chunks)
else:
    # Use loaded indexes
    print("\n‚ôªÔ∏è Using existing indexes")
    index = loaded_index
    bm25 = loaded_bm25
    all_chunks = loaded_chunks  # Update all_chunks to loaded version

print("\n‚úÖ Index setup complete!")
print(f"   Final chunk count: {len(all_chunks)}")


STEP 4: CREATING INDEXES


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

‚ùå Error loading indexes: Error in faiss::FileIOReader::FileIOReader(const char*) at /project/third-party/faiss/faiss/impl/io.cpp:69: Error: 'f' failed: could not open boeing_manual_faiss.index for reading: No such file or directory

üî® Building new indexes from scratch...
üìä Processing 189 chunks from STEP 3
Creating embeddings...


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Creating BM25 index...
‚úÖ Created FAISS index with 189 embeddings
‚úÖ Created BM25 index with 189 documents
‚úÖ Saved indexes and chunks to disk

‚úÖ Index setup complete!
   Final chunk count: 189


In [7]:
# ============================================================================
# STEP 5: CONSOLIDATED QUERY FUNCTIONS
# ============================================================================

print("\n" + "="*80)
print("STEP 5: CONSOLIDATED QUERY FUNCTIONS")
print("="*80)

def hybrid_search(query, top_k=5, alpha=0.5):
    """Hybrid search combining semantic (FAISS) and keyword (BM25)"""

    # Semantic search
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype('float32')
    distances, indices = index.search(query_embedding, top_k * 3)

    # Normalize semantic scores
    semantic_scores = 1 / (1 + distances[0])
    semantic_scores = semantic_scores / (semantic_scores.max() + 1e-6)

    # BM25 keyword search
    tokenized_query = query.lower().split()
    bm25_scores = bm25.get_scores(tokenized_query)
    bm25_scores = bm25_scores / (bm25_scores.max() + 1e-6)

    # Combine scores
    combined_scores = {}
    for idx, score in zip(indices[0], semantic_scores):
        combined_scores[idx] = alpha * score

    for idx, score in enumerate(bm25_scores):
        if idx in combined_scores:
            combined_scores[idx] += (1 - alpha) * score
        else:
            combined_scores[idx] = (1 - alpha) * score

    # Sort by combined score
    sorted_indices = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)

    # Group by page to ensure page diversity
    page_groups = {}
    for idx, score in sorted_indices:
        chunk = all_chunks[idx]
        page_num = chunk["page_number"]

        if page_num not in page_groups:
            page_groups[page_num] = {
                "chunks": [],
                "max_score": score,
                "page_num": page_num,
                "semantic_score": 0,
                "bm25_score": 0
            }

        page_groups[page_num]["chunks"].append({
            "chunk_idx": idx,
            "score": score,
            "chunk": chunk
        })

        if score > page_groups[page_num]["max_score"]:
            page_groups[page_num]["max_score"] = score

        # Track individual scores for diagnostics
        if idx < len(semantic_scores):
            page_groups[page_num]["semantic_score"] = max(page_groups[page_num]["semantic_score"], semantic_scores[idx])
        if idx < len(bm25_scores):
            page_groups[page_num]["bm25_score"] = max(page_groups[page_num]["bm25_score"], bm25_scores[idx])

    # Sort pages by their highest scoring chunk
    sorted_pages = sorted(page_groups.values(), key=lambda x: x["max_score"], reverse=True)

    # Build results with diagnostic information
    results = []
    for page_group in sorted_pages[:top_k]:
        # Get the highest scoring chunk from this page
        best_chunk = max(page_group["chunks"], key=lambda x: x["score"])
        chunk = best_chunk["chunk"]

        results.append({
            "content": chunk["content"],
            "page_number": page_group["page_num"],
            "type": chunk.get("type", "text"),
            "score": float(best_chunk["score"]),
            "semantic_score": float(page_group["semantic_score"]),
            "bm25_score": float(page_group["bm25_score"]),
            "has_image": chunk.get("page_image") is not None,
            "page_image": chunk.get("page_image"),
            "metadata": chunk.get("metadata", {})
        })

    return results

def simple_rerank(query, results, top_k=5):
    """Simple re-ranking based on query type"""

    query_lower = query.lower()

    # Check if this is a performance table query
    is_performance_query = any(term in query_lower for term in
                              ["weight", "limit", "altitude", "runway", "performance", "field", "climb"])

    # Check if this is a procedural query
    is_procedural_query = any(term in query_lower for term in
                             ["procedure", "step", "checklist", "how to", "perform"])

    # Re-score based on query type
    for result in results:
        metadata = result.get("metadata", {})

        # Initialize rerank score with original score
        rerank_score = result["score"]

        # Boost performance tables for performance queries
        if is_performance_query and metadata.get("table_type"):
            rerank_score *= 1.5

        # Boost procedural content for procedural queries
        if is_procedural_query and "procedure" in result["content"].lower():
            rerank_score *= 1.3

        # Store the rerank score
        result["rerank_score"] = rerank_score

    # Sort by rerank score
    results.sort(key=lambda x: x["rerank_score"], reverse=True)

    return results[:top_k]

def query_boeing_manual(question, top_k=15, alpha=0.5, use_reranking=True, show_diagnostics=False):
    """Complete RAG query with optional re-ranking and diagnostics"""

    # Step 1: Initial retrieval with hybrid search
    results = hybrid_search(question, top_k=top_k*2, alpha=alpha)

    # Step 2: Re-ranking if enabled
    if use_reranking:
        results = simple_rerank(question, results, len(results))
        results = results[:top_k]
    else:
        results = results[:top_k]

    # Step 3: Prepare context for generation
    #page_numbers = sorted(list(set([r["page_number"] for r in results])))
    seen_pages = set()
    page_numbers = []
    for result in results:
        if result["page_number"] not in seen_pages:
            page_numbers.append(result["page_number"])
            seen_pages.add(result["page_number"])
    text_parts = []
    visual_parts = []

    for rank, r in enumerate(results, 1):
        if r["type"] == "visual" and r["has_image"]:
            visual_parts.append((rank, r))
        else:
            # Include metadata in context for better understanding
            metadata = r.get("metadata", {})
            metadata_str = ", ".join([f"{k}: {v}" for k, v in metadata.items() if v])

            if show_diagnostics:
                # Include diagnostic scores
                text_parts.append(f"[Page {r['page_number']} - Rank {rank} - Score: {r.get('rerank_score', r['score']):.3f}]\n"
                                 f"Semantic: {r.get('semantic_score', 0):.3f} | BM25: {r.get('bm25_score', 0):.3f}\n"
                                 f"Metadata: {metadata_str}\n"
                                 f"Content: {r['content']}")
            else:
                # Clean version without diagnostics
                text_parts.append(f"[Page {r['page_number']}]\n{r['content']}")

    context = "\n\n---\n\n".join(text_parts)

    # Step 4: Generate answer
    try:
        if visual_parts:
            model = genai.GenerativeModel('gemini-2.5-pro')
            parts = [f"""Answer this question about the Boeing 737 Operations Manual.

            CRITICAL INSTRUCTIONS FOR READING TABLES:
            - Look at the VISUAL table image carefully
            - Locate the EXACT row and column specified in the question
            - Read the value at the intersection VERY carefully
            - Double-check you're reading the correct cell
            - Tables may have multiple sub-columns - make sure you're in the right one
            - If the table has poor quality text extraction, RELY ON THE IMAGE

            Question: {question}

            Text context:
            {context}

            IMPORTANT: Visual tables below are the PRIMARY source. Read them carefully:
            """]

            for rank, vp in visual_parts:
                img = Image.open(io.BytesIO(vp["page_image"]))
                parts.append(f"\n[Page {vp['page_number']} - RANK {rank}]")
                parts.append(img)
                parts.append(f"\nExtracted text (may have OCR errors, use image if unclear):\n{vp['content'][:1000]}")
                parts.append("\n\nIMPORTANT: Read the table image carefully. Verify your answer by checking the specific row and column.")

            response = model.generate_content(parts)
            answer = response.text
        else:
            model = genai.GenerativeModel('gemini-2.5-pro')
            prompt = f"""Answer this question about the Boeing 737 Operations Manual.

            CRITICAL FOR TABLES:
            - Find the EXACT row mentioned (e.g., "1600 meters")
            - Find the EXACT column mentioned (e.g., "1000 FT", "WET")
            - Read the value at that specific intersection
            - Be extremely precise - one cell off gives wrong answer

            Question: {question}

            Context:
            {context}

            Read the table precisely and provide the exact value:"""

            response = model.generate_content(prompt)
            answer = response.text
    except Exception as e:
        answer = f"Error: {str(e)}"

    return {
        "answer": answer,
        "pages": page_numbers,
        "results": results if show_diagnostics else None  # Include detailed results only for diagnostics
    }

print("‚úÖ Consolidated query functions created")


STEP 5: CONSOLIDATED QUERY FUNCTIONS
‚úÖ Consolidated query functions created


In [8]:
# ============================================================================
# STEP 6: TESTING FUNCTION
# ============================================================================

print("\n" + "="*80)
print("STEP 6: TESTING FUNCTION")
print("="*80)

def test_rag_system(questions, show_diagnostics=False):
    """Test the RAG system with optional diagnostics"""

    for i, (question, expected_pages) in enumerate(questions, 1):
        print(f"\n{'='*80}")
        print(f"Test {i}/{len(questions)}: {question[:80]}...")
        print(f"Expected pages: {expected_pages}")
        print('='*80)

        # Get results
        response = query_boeing_manual(question, top_k=15, alpha=0.5, use_reranking=True, show_diagnostics=show_diagnostics)

        if show_diagnostics and response["results"]:
            # Print detailed results
            print("\nRetrieved pages with scores:")
            for j, result in enumerate(response["results"][:10], 1):
                page_num = result["page_number"]
                score = result.get("rerank_score", result["score"])
                semantic_score = result.get("semantic_score", 0)
                bm25_score = result.get("bm25_score", 0)
                is_expected = "‚úÖ" if page_num in expected_pages else "  "

                print(f"  {j}. {is_expected} Page {page_num}: Total={score:.3f}, Semantic={semantic_score:.3f}, BM25={bm25_score:.3f}")

        # Check if expected pages are included
        included = [p for p in expected_pages if p in response['pages']]
        missing = [p for p in expected_pages if p not in response['pages']]

        print(f"\n‚úÖ Expected pages included: {included}")
        print(f"‚ùå Expected pages missing: {missing}")

        print(f"\nüí° Answer:\n{response['answer']}\n")
        print(f"Result: {'‚úÖ PASS' if len(included) == len(expected_pages) else '‚ö†Ô∏è PARTIAL' if included else '‚ùå FAIL'}")

print("‚úÖ Testing function created")


STEP 6: TESTING FUNCTION
‚úÖ Testing function created


In [None]:
# ============================================================================
# GEMINI API CONFIGURATION
# ============================================================================

import google.generativeai as genai
import os

# Configure Gemini (update with your API key method)
os.environ['GEMINI_API_KEY'] = 'Replace with your actual API key' 
genai.configure(api_key=os.environ['GEMINI_API_KEY'])

# Test the connection
try:
    model = genai.GenerativeModel('gemini-2.5-pro')
    response = model.generate_content("Hello, can you respond with 'API connection successful'?")
    print(f"‚úÖ Gemini API connection: {response.text}")
except Exception as e:
    print(f"‚ùå Error connecting to Gemini API: {str(e)}")

‚úÖ Gemini API connection: API connection successful


In [11]:
print("\n" + "="*80)
print("STEP 7: COMPREHENSIVE RAG SYSTEM EVALUATION (UPDATED)")
print("="*80)

def evaluate_rag_system(test_questions, top_k=15, alpha=0.5, use_reranking=True):
    """
    Evaluates the RAG system with a focus on user-centric metrics, all computed @5:
    - Recall@5
    - Precision@5
    - F1-Score@5
    - MRR
    - MAP
    """
    evaluation_results = []

    # Lists to store metrics for each question
    recall_at_5_list = []
    precision_at_5_list = []
    f1_score_list = []
    mrr_list = []
    map_score_list = []

    print(f"Evaluating {len(test_questions)} questions (retrieving top {top_k} results)...")

    for i, (question, expected_pages) in enumerate(test_questions, 1):
        print(f"\n--- Evaluating Question {i}/{len(test_questions)} ---")

        response = query_boeing_manual(
            question,
            top_k=top_k,
            alpha=alpha,
            use_reranking=use_reranking,
            show_diagnostics=False
        )

        retrieved_pages = response['pages']

        # --- Find ranks of all correct pages ---
        correct_ranks = [rank for rank, page_num in enumerate(retrieved_pages, 1) if page_num in expected_pages]

        num_total_relevant = len(expected_pages)
        num_relevant_in_top_5 = len([p for p in retrieved_pages[:5] if p in expected_pages])

        # --- Recall@5 ---
        recall_at_5 = num_relevant_in_top_5 / num_total_relevant if num_total_relevant > 0 else 0
        recall_at_5_list.append(recall_at_5)

        # --- Precision@5 ---
        precision_at_5 = num_relevant_in_top_5 / 5
        precision_at_5_list.append(precision_at_5)

        # --- F1-Score@5 ---
        if (precision_at_5 + recall_at_5) > 0:
            f1_score = 2 * (precision_at_5 * recall_at_5) / (precision_at_5 + recall_at_5)
        else:
            f1_score = 0
        f1_score_list.append(f1_score)

        # --- Mean Reciprocal Rank (MRR) ---
        first_correct_rank = correct_ranks[0] if correct_ranks else None
        mrr_list.append(1 / first_correct_rank if first_correct_rank else 0)

        # --- Average Precision (for MAP) ---
        precisions_at_correct_docs = []
        for rank in correct_ranks:
            num_correct_up_to_this_rank = len([r for r in correct_ranks if r <= rank])
            precisions_at_correct_docs.append(num_correct_up_to_this_rank / rank)

        avg_precision = sum(precisions_at_correct_docs) / len(precisions_at_correct_docs) if precisions_at_correct_docs else 0
        map_score_list.append(avg_precision)

        # --- Store detailed results ---
        evaluation_results.append({
            "question": question,
            "expected_pages": expected_pages,
            "retrieved_pages": retrieved_pages,
            "correct_ranks": correct_ranks,
            "recall_at_5": recall_at_5,
            "precision_at_5": precision_at_5,
            "f1_score": f1_score,
            "mrr": 1 / first_correct_rank if first_correct_rank else 0,
            "map_score": avg_precision
        })

        print(f"Expected: {expected_pages} | Retrieved Top 5: {retrieved_pages[:5]}")
        print(f"Recall@5: {recall_at_5:.2f} | Precision@5: {precision_at_5:.2f} | F1@5: {f1_score:.2f}")

    # --- Summary Metrics ---
    num_questions = len(test_questions)
    summary_metrics = {
        "total_questions": num_questions,
        "mean_recall_at_5": sum(recall_at_5_list) / num_questions,
        "mean_precision_at_5": sum(precision_at_5_list) / num_questions,
        "mean_f1_score": sum(f1_score_list) / num_questions,
        "mean_reciprocal_rank": sum(mrr_list) / num_questions,
        "map_score": sum(map_score_list) / num_questions
    }

    # --- Final Composite Retrieval Score ---
    f1_weight = 0.4
    mrr_weight = 0.4
    map_weight = 0.2

    final_retrieval_score = (
        f1_weight * summary_metrics['mean_f1_score'] +
        mrr_weight * summary_metrics['mean_reciprocal_rank'] +
        map_weight * summary_metrics['map_score']
    )

    summary_metrics['final_retrieval_score'] = final_retrieval_score
    summary_metrics['score_weights'] = {'f1': f1_weight, 'mrr': mrr_weight, 'map': map_weight}

    return {
        "detailed_results": evaluation_results,
        "summary_metrics": summary_metrics
    }



STEP 7: COMPREHENSIVE RAG SYSTEM EVALUATION (UPDATED)


In [13]:
def print_evaluation_report(evaluation_results):
    """Prints a formatted report with the new user-centric metrics."""

    print("\n" + "="*80)
    print("COMPREHENSIVE EVALUATION REPORT (FOCUSED ON RECALL@5 & PRECISION@3)")
    print("="*80)

    metrics = evaluation_results['summary_metrics']
    weights = metrics['score_weights']

    print(f"Total Questions Evaluated: {metrics['total_questions']}")
    print("\n--- Key User-Centric Performance Metrics ---")
    print(f"Mean Recall@5 (Correct page in top 5):      {metrics['mean_recall_at_5']:.2%}")
    print(f"Mean Precision@5 (Correctness of top 5):    {metrics['mean_precision_at_5']:.2%}")
    print("\n--- Core Performance Metrics ---")
    print(f"Mean F1-Score@5:     {metrics['mean_f1_score']:.4f}")
    print(f"Mean Reciprocal Rank (MRR):         {metrics['mean_reciprocal_rank']:.4f}")
    print(f"Mean Average Precision (MAP):       {metrics['map_score']:.4f}")

    print("\n--- Final Composite Retrieval Score ---")
    print(f"Formula: {weights['f1']}*F1 + {weights['mrr']}*MRR + {weights['map']}*MAP")
    print(f"Final Score: {metrics['final_retrieval_score']:.4f}")

    print("\n--- Detailed Question-by-Question Analysis ---")
    for result in evaluation_results['detailed_results']:
        print(f"\nQuestion: {result['question'][:80]}...")
        print(f"  Expected: {result['expected_pages']} | Retrieved Top 5: {result['retrieved_pages'][:5]}")
        print(f"  Recall@5: {result['recall_at_5']:.2f} | Precision@5: {result['precision_at_5']:.2f}")

print("‚úÖ Updated comprehensive evaluation functions created.")

‚úÖ Updated comprehensive evaluation functions created.


In [12]:
# ============================================================================
# RUN TESTS
# ============================================================================

print("\n" + "="*80)
print("RUNNING TESTS")
print("="*80)

test_questions = [

    ("I'm calculating our takeoff weight for a dry runway. We're at 2,000 feet pressure altitude, and the OAT is 50¬∞C. What's the climb limit weight ?", [83]),
    ("We're doing a Flaps 15 takeoff. Remind me, what is the first flap selection we make during retraction, and at what speed?", [41]),
    ("We're planning a Flaps 40 landing on a wet runway at a 1,000-foot pressure altitude airport. If the wind-corrected field length is 1,600 meters, what is our field limit weight?", [99]),
    ("Reviewing the standard takeoff profile: After we're airborne and get a positive rate of climb, what is the first action we take?",[39,51]),
    ("For a standard visual pattern, what three actions must be completed prior to turning base?", [56]),
    ("When the Pilot Not Flying (PNF) makes CDU entries during flight, what must the Pilot Flying (PF) do prior to execution", [5]),
    ("I see an amber STAIRS OPER light illuminated on the forward attendant panel; what does that light indicate?", [126]),
    ("We've just completed the engine start. What is the correct configuration for the ISOLATION VALVE switch during the After Start Procedure?",[35]),
    ("During the Descent and Approach procedure, what action is taken with the AUTO BRAKE select switch , and what is the Pilot Flying's final action regarding the autobrake system during the Landing Roll procedure?",[43, 47])

]

# Run tests with diagnostics
test_rag_system(test_questions, show_diagnostics=True)


RUNNING TESTS

Test 1/9: I'm calculating our takeoff weight for a dry runway. We're at 2,000 feet pressur...
Expected pages: [83]

Retrieved pages with scores:
  1.    Page 40: Total=1.169, Semantic=0.961, BM25=0.586
  2. ‚úÖ Page 83: Total=1.045, Semantic=0.000, BM25=0.692
  3.    Page 82: Total=1.039, Semantic=0.000, BM25=0.692
  4.    Page 107: Total=0.889, Semantic=0.000, BM25=1.000
  5.    Page 103: Total=0.831, Semantic=0.000, BM25=0.780
  6.    Page 42: Total=0.790, Semantic=0.964, BM25=0.664
  7.    Page 104: Total=0.778, Semantic=0.000, BM25=0.862
  8.    Page 61: Total=0.761, Semantic=0.993, BM25=0.632
  9.    Page 74: Total=0.759, Semantic=0.000, BM25=0.575
  10.    Page 106: Total=0.741, Semantic=0.000, BM25=0.706

‚úÖ Expected pages included: [83]
‚ùå Expected pages missing: []

üí° Answer:
Based on the "Takeoff Field & Climb Limit Weights - Dry Runway" table for **2000 FT Pressure Altitude** on page 83, the climb limit weight at an OAT of **50¬∞C** is **52.2 (1000 KG)**

In [14]:
test_questions = [
    ("I'm calculating our takeoff weight for a dry runway. We're at 2,000 feet pressure altitude, and the OAT is 50¬∞C. What's the climb limit weight ?", [83]),
    ("We're doing a Flaps 15 takeoff. Remind me, what is the first flap selection we make during retraction, and at what speed?", [41]),
    ("We're planning a Flaps 40 landing on a wet runway at a 1,000-foot pressure altitude airport. If the wind-corrected field length is 1,600 meters, what is our field limit weight?", [99]),
    ("Reviewing the standard takeoff profile: After we're airborne and get a positive rate of climb, what is the first action we take?",[39,51]),
    ("For a standard visual pattern, what three actions must be completed prior to turning base?", [56]),
    ("When the Pilot Not Flying (PNF) makes CDU entries during flight, what must the Pilot Flying (PF) do prior to execution", [5]),
    ("I see an amber STAIRS OPER light illuminated on the forward attendant panel; what does that light indicate?", [126]),
    ("We've just completed the engine start. What is the correct configuration for the ISOLATION VALVE switch during the After Start Procedure?",[35]),
    ("During the Descent and Approach procedure, what action is taken with the AUTO BRAKE select switch , and what is the Pilot Flying's final action regarding the autobrake system during the Landing Roll procedure?",(43, 47)),
    ("Looking at the panel scan responsibilities for when the aircraft is stationary, who is responsible for the forward aisle stand?",[6])

]
# In your RUN TESTS section
evaluation_results = evaluate_rag_system(
    test_questions,
    top_k=10,
    alpha=0.5,
    use_reranking=True
)

# Print the comprehensive report
print_evaluation_report(evaluation_results)

Evaluating 10 questions (retrieving top 10 results)...

--- Evaluating Question 1/10 ---
Expected: [83] | Retrieved Top 5: [83, 82, 107, 103, 42]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 2/10 ---
Expected: [41] | Retrieved Top 5: [40, 41, 67, 37, 54]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 3/10 ---
Expected: [99] | Retrieved Top 5: [86, 85, 83, 82, 99]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 4/10 ---
Expected: [39, 51] | Retrieved Top 5: [40, 39, 46, 42, 54]
Recall@5: 0.50 | Precision@5: 0.20 | F1@5: 0.29

--- Evaluating Question 5/10 ---
Expected: [56] | Retrieved Top 5: [67, 56, 49, 42, 4]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 6/10 ---
Expected: [5] | Retrieved Top 5: [5, 4, 7, 46, 45]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 7/10 ---
Expected: [126] | Retrieved Top 5: [126, 3, 18, 11, 112]
Recall@5: 1.00 | Precision@5: 0

In [17]:
test_questions = [
    ("I'm calculating our takeoff weight for a dry runway. We're at 2,000 feet pressure altitude, and the OAT is 50¬∞C. What's the climb limit weight ?", [83]),
    ("We're doing a Flaps 15 takeoff. Remind me, what is the first flap selection we make during retraction, and at what speed?", [41]),
    ("We're planning a Flaps 40 landing on a wet runway at a 1,000-foot pressure altitude airport. If the wind-corrected field length is 1,600 meters, what is our field limit weight?", [99]),
    ("Reviewing the standard takeoff profile: After we're airborne and get a positive rate of climb, what is the first action we take?",[39,51]),
    ("For a standard visual pattern, what three actions must be completed prior to turning base?", [56]),
    ("When the Pilot Not Flying (PNF) makes CDU entries during flight, what must the Pilot Flying (PF) do prior to execution", [5]),
    ("I see an amber STAIRS OPER light illuminated on the forward attendant panel; what does that light indicate?", [126]),
    ("We've just completed the engine start. What is the correct configuration for the ISOLATION VALVE switch during the After Start Procedure?",[35]),
    ("During the Descent and Approach procedure, what action is taken with the AUTO BRAKE select switch , and what is the Pilot Flying's final action regarding the autobrake system during the Landing Roll procedure?",(43, 47)),
    ("Looking at the panel scan responsibilities for when the aircraft is stationary, who is responsible for the forward aisle stand?",[6])

]
# In your RUN TESTS section
evaluation_results = evaluate_rag_system(
    test_questions,
    top_k=10,
    alpha=0.5,
    use_reranking=True
)

# Print the comprehensive report
print_evaluation_report(evaluation_results)

Evaluating 9 questions (retrieving top 10 results)...

--- Evaluating Question 1/9 ---
Expected: [83] | Retrieved Top 5: [83, 82, 107, 103, 42]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 2/9 ---
Expected: [41] | Retrieved Top 5: [40, 41, 67, 37, 54]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 3/9 ---
Expected: [99] | Retrieved Top 5: [86, 85, 83, 82, 99]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 4/9 ---
Expected: [39, 51] | Retrieved Top 5: [40, 39, 46, 42, 54]
Recall@5: 0.50 | Precision@5: 0.20 | F1@5: 0.29

--- Evaluating Question 5/9 ---
Expected: [56] | Retrieved Top 5: [67, 56, 49, 42, 4]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 6/9 ---
Expected: [5] | Retrieved Top 5: [5, 4, 7, 46, 45]
Recall@5: 1.00 | Precision@5: 0.20 | F1@5: 0.33

--- Evaluating Question 7/9 ---
Expected: [126] | Retrieved Top 5: [126, 3, 18, 11, 112]
Recall@5: 1.00 | Precision@5: 0.20 | F1

In [13]:
test_questions = [
    ("I'm calculating our takeoff weight for a dry runway. We're at 2,000 feet pressure altitude, and the OAT is 50¬∞C. What's the climb limit weight ?", [83]),
    ("We're doing a Flaps 15 takeoff. Remind me, what is the first flap selection we make during retraction, and at what speed?", [41]),
    ("We're planning a Flaps 40 landing on a wet runway at a 1,000-foot pressure altitude airport. If the wind-corrected field length is 1,600 meters, what is our field limit weight?", [99]),
    ("Reviewing the standard takeoff profile: After we're airborne and get a positive rate of climb, what is the first action we take?",[39,51]),
    ("For a standard visual pattern, what three actions must be completed prior to turning base?", [56]),
    ("When the Pilot Not Flying (PNF) makes CDU entries during flight, what must the Pilot Flying (PF) do prior to execution", [5]),
    ("I see an amber STAIRS OPER light illuminated on the forward attendant panel; what does that light indicate?", [126]),
    ("We've just completed the engine start. What is the correct configuration for the ISOLATION VALVE switch during the After Start Procedure?",[35]),
    ("During the Descent and Approach procedure, what action is taken with the AUTO BRAKE select switch , and what is the Pilot Flying's final action regarding the autobrake system during the Landing Roll procedure?",(43, 47))

]
# In your RUN TESTS section
evaluation_results = evaluate_rag_system(
    test_questions,
    top_k=10,
    alpha=0.5,
    use_reranking=True
)

# Print the comprehensive report
print_evaluation_report(evaluation_results)

Evaluating 9 questions (retrieving top 10 results)...

--- Evaluating Question 1/9 ---
Expected: [83] | Retrieved Top 5: [83, 82, 107, 103, 42]
Recall@5: 1.00 | Precision@3: 0.33

--- Evaluating Question 2/9 ---
Expected: [41] | Retrieved Top 5: [40, 41, 67, 37, 54]
Recall@5: 1.00 | Precision@3: 0.33

--- Evaluating Question 3/9 ---
Expected: [99] | Retrieved Top 5: [86, 85, 83, 82, 99]
Recall@5: 1.00 | Precision@3: 0.00

--- Evaluating Question 4/9 ---
Expected: [39, 51] | Retrieved Top 5: [40, 39, 46, 42, 54]
Recall@5: 0.50 | Precision@3: 0.33

--- Evaluating Question 5/9 ---
Expected: [56] | Retrieved Top 5: [67, 56, 49, 42, 4]
Recall@5: 1.00 | Precision@3: 0.33

--- Evaluating Question 6/9 ---
Expected: [5] | Retrieved Top 5: [5, 4, 7, 46, 45]
Recall@5: 1.00 | Precision@3: 0.33

--- Evaluating Question 7/9 ---
Expected: [126] | Retrieved Top 5: [126, 3, 18, 11, 112]
Recall@5: 1.00 | Precision@3: 0.33

--- Evaluating Question 8/9 ---
Expected: [35] | Retrieved Top 5: [35, 67, 34, 64

##REranking with cross_encoder

In [14]:
# OPTION 1: Cross-Encoder Reranking (Best Performance)
# ============================================================================
from sentence_transformers import CrossEncoder

# Initialize once at startup
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def cross_encoder_rerank(query, results, top_k=5):
    """
    Uses a trained cross-encoder to score query-document pairs.
    Much more accurate than rule-based scoring.
    """
    # Prepare query-document pairs
    pairs = [[query, result["content"][:512]] for result in results]

    # Get relevance scores from cross-encoder
    scores = cross_encoder.predict(pairs)

    # Add scores to results
    for result, score in zip(results, scores):
        result["rerank_score"] = float(score)

    # Sort by rerank score
    results.sort(key=lambda x: x["rerank_score"], reverse=True)

    return results[:top_k]



config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

In [15]:
def query_boeing_manual(question, top_k=15, alpha=0.5, use_reranking=True, show_diagnostics=False):
    """Complete RAG query with optional re-ranking and diagnostics"""

    # Step 1: Initial retrieval with hybrid search
    results = hybrid_search(question, top_k=top_k*2, alpha=alpha)

    # Step 2: Re-ranking if enabled
    if use_reranking:
        results = cross_encoder_rerank(question, results, top_k)
    else:
        results = results[:top_k]

    # Step 3: Prepare context for generation
    #page_numbers = sorted(list(set([r["page_number"] for r in results])))
    seen_pages = set()
    page_numbers = []
    for result in results:
        if result["page_number"] not in seen_pages:
            page_numbers.append(result["page_number"])
            seen_pages.add(result["page_number"])
    text_parts = []
    visual_parts = []

    for rank, r in enumerate(results, 1):
        if r["type"] == "visual" and r["has_image"]:
            visual_parts.append((rank, r))
        else:
            # Include metadata in context for better understanding
            metadata = r.get("metadata", {})
            metadata_str = ", ".join([f"{k}: {v}" for k, v in metadata.items() if v])

            if show_diagnostics:
                # Include diagnostic scores
                text_parts.append(f"[Page {r['page_number']} - Rank {rank} - Score: {r.get('rerank_score', r['score']):.3f}]\n"
                                 f"Semantic: {r.get('semantic_score', 0):.3f} | BM25: {r.get('bm25_score', 0):.3f}\n"
                                 f"Metadata: {metadata_str}\n"
                                 f"Content: {r['content']}")
            else:
                # Clean version without diagnostics
                text_parts.append(f"[Page {r['page_number']}]\n{r['content']}")

    context = "\n\n---\n\n".join(text_parts)

    # Step 4: Generate answer
    try:
        if visual_parts:
            model = genai.GenerativeModel('gemini-2.5-pro')
            parts = [f"""Answer this question about the Boeing 737 Operations Manual.

            CRITICAL INSTRUCTIONS FOR READING TABLES:
            - Look at the VISUAL table image carefully
            - Locate the EXACT row and column specified in the question
            - Read the value at the intersection VERY carefully
            - Double-check you're reading the correct cell
            - Tables may have multiple sub-columns - make sure you're in the right one
            - If the table has poor quality text extraction, RELY ON THE IMAGE

            Question: {question}

            Text context:
            {context}

            IMPORTANT: Visual tables below are the PRIMARY source. Read them carefully:
            """]

            for rank, vp in visual_parts:
                img = Image.open(io.BytesIO(vp["page_image"]))
                parts.append(f"\n[Page {vp['page_number']} - RANK {rank}]")
                parts.append(img)
                parts.append(f"\nExtracted text (may have OCR errors, use image if unclear):\n{vp['content'][:1000]}")
                parts.append("\n\nIMPORTANT: Read the table image carefully. Verify your answer by checking the specific row and column.")

            response = model.generate_content(parts)
            answer = response.text
        else:
            model = genai.GenerativeModel('gemini-2.5-pro')
            prompt = f"""Answer this question about the Boeing 737 Operations Manual.

            CRITICAL FOR TABLES:
            - Find the EXACT row mentioned (e.g., "1600 meters")
            - Find the EXACT column mentioned (e.g., "1000 FT", "WET")
            - Read the value at that specific intersection
            - Be extremely precise - one cell off gives wrong answer

            Question: {question}

            Context:
            {context}

            Read the table precisely and provide the exact value:"""

            response = model.generate_content(prompt)
            answer = response.text
    except Exception as e:
        answer = f"Error: {str(e)}"

    return {
        "answer": answer,
        "pages": page_numbers,
        "results": results if show_diagnostics else None  # Include detailed results only for diagnostics
    }
