In [9]:
import os 
import json
import pandas as pd
import os
import time
from langchain_core.documents import Document
from ragas.testset.graph import Node, NodeType, KnowledgeGraph
from langchain_google_genai import ChatGoogleGenerativeAI
from ragas.llms import LangchainLLMWrapper


In [5]:
# Put the exact folder that contains pdfinfo.exe here (no leading backslash).
poppler_bin = r"C:\Users\user\Downloads\poppler-25.11.0\Library\bin"

tesseract_dir = r"C:\Program Files\Tesseract-OCR"

# ensure pdf2image/pdf->image & unstructured subprocesses find it now
os.environ["PATH"] = tesseract_dir + os.pathsep + os.environ.get("PATH", "")
# Ensure the current Python process can see it
os.environ["PATH"] = poppler_bin + os.pathsep + os.environ.get("PATH", "")

In [6]:


# then run partition_pdf as before
from unstructured.partition.pdf import partition_pdf
base = r"C:\Users\user\Desktop\Mission_plane"
file_name = "pdf_example_Operator.pdf"
full_path = os.path.join(base, file_name)

raw_pdf_elements = partition_pdf(
    filename=full_path,
    image_output_dir_path=base,
    extract_images_in_pdf=True,
    infer_table_structure=True,
    strategy="hi_res",
)
print("Done — elements:", len(raw_pdf_elements))




The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


Done — elements: 81


In [None]:
from unstructured.documents.elements import Header, Footer, Title,NarrativeText, ListItem
from unstructured.chunking.title import chunk_by_title
import numpy as np 

## TODO:: find more in the whole document.
text_to_ignore = [
    "UNCLASSIFIED",
    "RESTRICTED",
    "CB-AGS Handbook",
    "NORMAL PROCEDURES",
    "NORMAL PROCEDURE", # Handling slight variations
    "16814",
    "Decrmber 2024", # Note: Copied the typo from your PDF content
    "December 2024",
    "THIS PAGE INTENTIONALLY LEFT BLANK",
    "Page NP"
]
# 3. Filter the elements
cleaned_elements = []
dynamic_titles = set()

original_count = len(raw_pdf_elements)

for element in raw_pdf_elements:
    # Check 1: Remove explicit Header/Footer types detected by the model
    if isinstance(element, (Header, Footer)):
        continue

    # Check 2: Remove elements that match our ignore list
    # We use 'strip()' to remove accidental whitespace
    clean_text = element.text.strip()

    # if clean_text in text_to_ignore:
    if np.any([i in clean_text for i in text_to_ignore]):
        continue


    # Keep the element if it passed all checks
    cleaned_elements.append(element)

In [11]:
cleaned_elements
for el in cleaned_elements:
    if 'Title' in str(type(el)):
      print(el)

Table of Contents
Before Flight
Before Power up
N Verify the following MS C.Bs are in the correct state
3. _ Ensure the following MS switches are in the co rrect state


In [12]:
print(f"   -> Removed {len(raw_pdf_elements) - len(cleaned_elements)} noise elements.")

# 4. Apply Chunking on the CLEAN list
print("2. Chunking filtered elements...")
chunks = chunk_by_title(
    elements=cleaned_elements,
    max_characters=11000,
    combine_text_under_n_chars=10,
    overlap = 1000
    # new_after_n_chars=4000
)

   -> Removed 43 noise elements.
2. Chunking filtered elements...


In [13]:
ind = 2
print("\n--- Example Chunk 1 (Type & Content) ---")
print(f"Type: {type(chunks[ind])}")
# Tables will be represented as HTML strings inside the chunk content
print(f"Text Start: {chunks[ind].text[:700]}...")
print("=" * 40)

print(f"Text End: {chunks[ind].text[-300:]}...")
print("-" * 40)
print(f"Metadata Keys: {list(chunks[ind].metadata.to_dict().keys())}")
# Metadata is crucial for RAG, containing source file, page number, and element type
print(f"Source Page: {chunks[ind].metadata.page_number}")


--- Example Chunk 1 (Type & Content) ---
Type: <class 'unstructured.documents.elements.CompositeElement'>
Text Start: Before Power up

On UTS server, insert the removable media containing MPD...
Text End: Before Power up

On UTS server, insert the removable media containing MPD...
----------------------------------------
Metadata Keys: ['file_directory', 'filename', 'filetype', 'languages', 'last_modified', 'page_number', 'orig_elements']
Source Page: 4


# Pre-Process for embedding

In [14]:
import pandas as pd
from io import StringIO

def format_chunk_for_embedding(element):
    """
    Combines the raw context (titles/text) with the structured table (if present).
    """
    # 1. Capture the full context (Title, Instructions, Bullet points)
    # This typically includes the section header (e.g., "Before Power Up")
    # Note: This also includes a messy version of the table, but that's okay.
    base_text = element.text.strip()

    structured_table_text = ""

    # 2. Check if there is a table and process it specifically
    if hasattr(element.metadata, 'text_as_html') and element.metadata.text_as_html:
        try:
            html = element.metadata.text_as_html
            # Parse HTML to DataFrame
            dfs = pd.read_html(StringIO(html))

            if dfs:
                df = dfs[0] # Assume the first table is the main one
                df = df.fillna('') # Clean NaNs

                # Convert to Markdown for clear structural understanding
                markdown_table = df.to_markdown(index=False)

                # Create a distinct separator so the model knows this is the clean version
                structured_table_text = f"\n\n[STRUCTURED DATATABLE]:\n{markdown_table}"

                # OPTIONAL: Create semantic sentences for better retrieval
                # meaningful_rows = []
                # for _, row in df.iterrows():
                #     meaningful_rows.append(f"Item {row.get('NAME', 'Unknown')} located in {row.get('LOCATION', 'Unknown')} must be {row.get('STATE', 'Unknown')}.")
                # structured_table_text += "\n" + "\n".join(meaningful_rows)

        except Exception as e:
            print(f"Error parsing table HTML: {e}")

    # 3. Combine them
    # We place the Base Text first (to set context) and the Structured Table second.
    final_combined_text = f"SECTION CONTEXT:\n{base_text}{structured_table_text}"

    return final_combined_text

processed_chunks = [format_chunk_for_embedding(chunk) for chunk in chunks]

# # Print a preview of the first one to verify
# print(processed_chunks[0])

# Embedding

In [15]:
import pandas as pd
from sentence_transformers import SentenceTransformer
# import faiss
import numpy as np
import json
from IPython.display import display, Markdown

# --- 1. CONFIGURATION ---
EMBEDDING_MODEL_NAME = 'BAAI/bge-small-en-v1.5'
EMBEDDING_DIMENSION = 384




## Creating vector-DB - Faiss

In [16]:
import faiss

"""
Gpu Version
"""
# def build_index(texts, model_name, dim):
#     print(f"Loading embedding model: {model_name}...")
#     model = SentenceTransformer(model_name, device='cuda')  # ensure embeddings are on GPU
#     print("Generating embeddings...")
#     embeddings = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

#     print("Creating FAISS GPU index...")
#     # Step 1: CPU index
#     index_cpu = faiss.IndexFlatL2(dim)

#     # Step 2: Initialize GPU resources
#     res = faiss.StandardGpuResources()

#     # Step 3: Move index to GPU
#     index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)

#     # Step 4: Add embeddings
#     index_gpu.add(embeddings)

#     return model, index_gpu
# model, index = build_index(processed_chunks, EMBEDDING_MODEL_NAME, EMBEDDING_DIMENSION)

"""
CPU version
"""
def build_index(texts, model_name, dim):
    print(f"Loading embedding model: {model_name}...")
    model = SentenceTransformer(model_name)
    print("Generating embeddings...")
    embeddings = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
    print("Creating FAISS index...")
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return model, index

model, index = build_index(processed_chunks, EMBEDDING_MODEL_NAME, EMBEDDING_DIMENSION)


Loading embedding model: BAAI/bge-small-en-v1.5...
Generating embeddings...
Creating FAISS index...


In [17]:
import numpy as np

def retrieve_relevant_chunks(query, model, index, stored_chunks, k=3):
    """
    1. Encodes the query.
    2. Searches the FAISS index.
    3. Retrieves the actual text from the stored_chunks list.
    """
    print(f"\n--- Query: '{query}' ---")

    # 1. Encode the query to vector
    # normalize_embeddings=True is crucial because you used it during indexing
    query_embedding = model.encode([query], convert_to_numpy=True, normalize_embeddings=True)

    # 2. Search Index
    # D = Distances (Lower is better for L2, Higher is better for Inner Product)
    # I = Indices (The row numbers in your processed_chunks list)
    D, I = index.search(query_embedding, k)

    # 3. Fetch and Print Results
    results = []
    for rank, (score, idx) in enumerate(zip(D[0], I[0])):
        if idx < len(stored_chunks): # Safety check
            retrieved_text = stored_chunks[idx]
            print(f"\n[Rank {rank+1}] (Score: {score:.4f})")
            print(f"Content snippet: {retrieved_text[:200]}...") # Printing first 200 chars for preview
            results.append(retrieved_text)

    return results

# --- TEST EXECUTION ---

# These queries are specifically designed based on the PDF content you provided
test_queries = [
    "What is the required state for EFB L before power up?",
    "What checks should be done on the removable media before flight?",
    "Do not use circuit breakers as on/off switches",
    "Where is the RDR Tx switch located?"
]

# Run the tests
for q in test_queries:
    retrieve_relevant_chunks(q, model, index, processed_chunks, k=1)
    break


--- Query: 'What is the required state for EFB L before power up?' ---

[Rank 1] (Score: 0.6536)
Content snippet: SECTION CONTEXT:
N Verify the following MS C.Bs are in the correct state

NOTE: This paragraph (CBs state) can be done parallel the next paragraph (Switches state)

to

LOCATION NAME STATE Cockpit EFB...


# Rag ERvaluation

## RAGAs Initlization

### Initilize ragas llm - gemini 1.5 flash + Embedding Model 

In [67]:
from langchain_core.documents import Document
from ragas.testset.graph import Node, NodeType, KnowledgeGraph
from langchain_google_genai import ChatGoogleGenerativeAI
from ragas.llms import LangchainLLMWrapper
import os 
# ==========================================
# SETUP: Initialize Gemini LLM
# ==========================================
# 1. Set your Google API Key
# Replace 'YOUR_GOOGLE_API_KEY_HERE' with your actual key or ensure it's in the environment
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = "AIzaSyBHcnu2MXHEtqbhIKDKIEKJOu1s_7WD-uQ"

print("Initializing Gemini Model...")



# 2. Initialize the Google Gemini Model with Retry Logic
# We try Flash first (faster), then Pro (more stable availability)

google_llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.1,
    # max_output_tokens=1024
)
# Test connection quickly
google_llm.invoke("Hello")
print("Using Gemini 2.5 Flash")
# 3. Wrap for Ragas
# This 'ragas_llm' object replaces the local pipeline
ragas_llm = LangchainLLMWrapper(google_llm)
print("Ragas LLM initialized with Google Gemini!")

Initializing Gemini Model...
Using Gemini 2.5 Flash
Ragas LLM initialized with Google Gemini!


  ragas_llm = LangchainLLMWrapper(google_llm)


In [None]:
import pandas as pd
import faiss
import numpy as np
from ragas import evaluate
from ragas.metrics import context_precision, context_recall
from datasets import Dataset
from langchain_huggingface import HuggingFaceEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper

# Define Embedding Model
EMBEDDING_MODEL_NAME = 'BAAI/bge-small-en-v1.5'
EMBEDDING_DIMENSION = 384

# Initialize the embedding model
lc_embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={'device': 'cpu'} # Use 'cpu' if GPU is not available
)

# Wrap for Ragas
ragas_embeddings = LangchainEmbeddingsWrapper(lc_embeddings)

### bulding knowledge graph

In [19]:
ragas_nodes = []

# We iterate through both the original chunks (for metadata) 
# and your processed text (for content) simultaneously.
for raw_chunk, clean_text in zip(chunks, processed_chunks):
    
    # Extract filename safely
    source_meta = raw_chunk.metadata.to_dict()
    filename = source_meta.get('filename') or source_meta.get('file_directory') or "manual.pdf"
    page_num = source_meta.get('page_number', 0)

    # Create the Node manually
    # This avoids the "AttributeError: from_langchain_document" bug
    node = Node(
        type=NodeType.DOCUMENT,
        properties={
            "page_content": clean_text,  # This contains your "SECTION CONTEXT: ..." string
            "filename": filename,
            "page_number": page_num
        }
    )
    ragas_nodes.append(node)

# Initialize the Graph
kg = KnowledgeGraph(nodes=ragas_nodes)
print("Knowledge Graph built successfully.")

Knowledge Graph built successfully.


In [89]:
"""
Testing The Embedding model size comapring to our data chunks and context.
"""

import pandas as pd
from transformers import AutoTokenizer

# 1. Load Tokenizer
model_name = 'BAAI/bge-small-en-v1.5'
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_length = tokenizer.model_max_length

print(f"Analyzing {len(kg.nodes)} nodes against model limit: {max_length} tokens")

# 2. Collect Data
data = []

for i, node in enumerate(kg.nodes):
    content = node.properties.get('page_content', '')
    
    # Count tokens
    token_ids = tokenizer.encode(content, add_special_tokens=True)
    count = len(token_ids)
    
    # Determine status
    if count > max_length:
        status = "⚠️ TRUNCATED"
        excess = count - max_length
    else:
        status = "✅ SAFE"
        excess = 0
        
    # Append row
    data.append({
        "Node ID": i,
        "Status": status,
        "Token Count": count,
        "Excess Tokens": excess,
        "Content Snippet": content[:50].replace('\n', ' ') + "..." # One-line snippet
    })

# 3. Create and Display Table
df_analysis = pd.DataFrame(data)

# Display the table
print("\n--- Token Usage Summary ---")
print(df_analysis.to_markdown(index=False))


Token indices sequence length is longer than the specified maximum sequence length for this model (1731 > 512). Running this sequence through the model will result in indexing errors


Analyzing 5 nodes against model limit: 512 tokens

--- Token Usage Summary ---
|   Node ID | Status       |   Token Count |   Excess Tokens | Content Snippet                                       |
|----------:|:-------------|--------------:|----------------:|:------------------------------------------------------|
|         0 | ⚠️ TRUNCATED |          1731 |            1219 | SECTION CONTEXT: Table of Contents  Before FIIQNt.... |
|         1 | ✅ SAFE      |           109 |               0 | SECTION CONTEXT: Before Flight  1. Mission Plannin... |
|         2 | ✅ SAFE      |            21 |               0 | SECTION CONTEXT: Before Power up  On UTS server, i... |
|         3 | ⚠️ TRUNCATED |           775 |             263 | SECTION CONTEXT: N Verify the following MS C.Bs ar... |
|         4 | ⚠️ TRUNCATED |           661 |             149 | SECTION CONTEXT: 3. _ Ensure the following MS swit... |


## KEYWORDS evaluation test

In [None]:
# ==========================================
# STEP 1: Build the "Student" (The Retriever)
# ==========================================
print("Step 1: Building FAISS Index (The Retriever)...")

# 1. Prepare the text data from your nodes
doc_texts = [node.properties["page_content"] for node in ragas_nodes]

# 2. Generate Embeddings
# We use the 'ragas_embeddings' wrapper to embed the documents
print("Generating embeddings for knowledge graph...")
doc_embeddings = ragas_embeddings.embed_documents(doc_texts)

# 3. Convert to Numpy (Required for FAISS)
doc_embeddings_np = np.array(doc_embeddings).astype("float32")

# 4. Build the FAISS Index directly
dimension = doc_embeddings_np.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings_np)

print(f"FAISS Index built with {index.ntotal} documents.")


In [81]:
# ==========================================
# 3. GENERATION LOOP (The Exam - Scaled Up)
# ==========================================
print("\nStarting 10x Generation Loop...")

test_cases = []

# UPDATED PROMPT: Asks for a list of 10 items
prompt_template = """
You are a technical expert creating a comprehensive search evaluation dataset.
Analyze the following Operator Handbook text:

TEXT:
{text}

INSTRUCTIONS:
1. Identify various procedures, component states, safety warnings, and operational details in this text.
2. Generate 10 DISTINCT search scenarios. For each scenario, create a unique set of search keywords.
3. Extract the exact sentence from the text that answers each query.

OUTPUT FORMAT:
Return ONLY a raw JSON object with a single key "cases" containing a list of 10 objects.
Example structure:
{{
    "cases": [
        {{ "query": "keyword1 keyword2", "reference": "Answer sentence 1." }},
        {{ "query": "keyword3 keyword4", "reference": "Answer sentence 2." }},
        ...
    ]
}}
"""

for i, node in enumerate(ragas_nodes[1:]): #not includign the table of contents.
    context = node.properties["page_content"]
    
    # if len(context) < 50:
    #     print(f"  - Node {i}: Skipped (Too short)")
    #     continue

    print(f"  - Processing Node {i} (Generating ~10 cases)...")

    # 1. Call Gemini
    # We increase output tokens slightly to ensure the full list fits
    response = google_llm.invoke(prompt_template.format(text=context[:2000]))
    
    # 2. Clean Response
    clean_str = response.content.replace("```json", "").replace("```", "").strip()    
    # 3. Parse JSON
    data = json.loads(clean_str)
    
    # 4. Validate & Save List
    current_batch_count = 0
    if "cases" in data and isinstance(data["cases"], list):
        for item in data["cases"]:
            if "query" in item and "reference" in item:
                test_cases.append({
                    "user_input": item["query"],
                    "reference": item["reference"],
                    "source_context": context,
                    "page": node.properties["page_number"]
                })
                current_batch_count += 1
        print(f"    -> Successfully added {current_batch_count} test cases.")
    else:
        print(f"    -> Failed (JSON missing 'cases' list). Raw keys: {data.keys()}")
            
    # except Exception as e:
    #     print(f"    -> Error generating for Node {i}: {e}")
    
    # Sleep to be polite to API limits
    time.sleep(2)
    
df_results = pd.DataFrame(test_cases)
df_results


Starting 10x Generation Loop...
  - Processing Node 0 (Generating ~10 cases)...
    -> Successfully added 10 test cases.
  - Processing Node 1 (Generating ~10 cases)...
    -> Successfully added 10 test cases.
  - Processing Node 2 (Generating ~10 cases)...
    -> Successfully added 10 test cases.
  - Processing Node 3 (Generating ~10 cases)...
    -> Successfully added 10 test cases.


In [82]:
df_results = pd.DataFrame(test_cases)
df_results

Unnamed: 0,user_input,reference,source_context,page
0,prepare MPD save removable media,Prepare MPD and save it on the removable media,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
1,check removable media free space,Check the removable media free space availability,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
2,perform Technical Pre-Flight Procedure SO-5,Perform Technical Pre-Flight Procedure (SO-5) ...,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
3,Systems Serviceability Pre-Flight Procedure SO-6,Perform Technical Pre-Flight Procedure (SO-5) ...,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
4,ELTA recommends System Book maintenance,NOTE: ELTA recommends maintaining a System Book,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
5,System Book separate A/C Book,e Separate from A/C Book,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
6,log system events System Book,e Log system events on System Book,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
7,System Book overdue waivers status,e Ensure SYSTEM BOOK has no overdue waivers,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
8,comprehend installation configuration,e Comprehend the configuration of the installa...,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3
9,understand existing system faults,e Comprehend the configuration of the installa...,SECTION CONTEXT:\nBefore Flight\n\n1. Mission ...,3


In [None]:
# ==========================================
# STEP 3: Take the Exam (Run Retrieval)
# ==========================================
print("Step 3: Running the Exam (Retrieving contexts for queries)...")

# We need to add a "retrieved_contexts" column to your test dataset
test_questions = df_results[df_results['page']!=1]["user_input"].tolist()
ground_truths = df_results["reference"].tolist()
retrieved_contexts = []

for query in test_questions:
    # 1. Embed the query using the same embedding model
    query_embedding = ragas_embeddings.embed_query(query)
    query_embedding_np = np.array([query_embedding]).astype("float32")
    
    # 2. Search the FAISS index (Retrieve top 1 result)
    k = 1
    distances, indices = index.search(query_embedding_np, k)
    
    # 3. Extract the actual text based on the returned indices
    # indices[0] contains the list of IDs found for the first query
    found_texts = [doc_texts[idx] for idx in indices[0]]
    retrieved_contexts.append(found_texts)

# Create the dataset Ragas expects
evaluation_data = {
    "user_input": test_questions,      # What the user typed
    "reference": ground_truths,        # The correct answer/fact
    "retrieved_contexts": retrieved_contexts # What your system found
}

ragas_dataset = Dataset.from_dict(evaluation_data)


Step 2: Running the Exam (Retrieving contexts for queries)...


In [114]:
# ==========================================
# STEP 3: Grade the Exam (Calculate Metrics)
# ==========================================
print("Step 3: Grading with Ragas Metrics...")

# We define the metrics we want:
# 1. Context Recall: Did we retrieve the correct information? (Sensitivity)
# 2. Context Precision: Was the correct information at the top? (Ranking)

metrics = [context_recall, context_precision]

# We assume 'ragas_llm' and 'ragas_embeddings' are already loaded from your previous code
results = evaluate(
    dataset=ragas_dataset,
    metrics=metrics,
    llm=ragas_llm,       # Use Gemini as the Judge
    embeddings=ragas_embeddings
)

# ==========================================
# STEP 4: Show Report Card
# ==========================================
print("\n========================================")
print("          EVALUATION REPORT             ")
print("========================================")
print(results)

# Convert to table for detailed analysis
df_keywords_test_scores = results.to_pandas()
df_keywords_test_scores

Step 3: Grading with Ragas Metrics...


Evaluating: 100%|██████████| 80/80 [02:24<00:00,  1.81s/it]



          EVALUATION REPORT             
{'context_recall': 0.6500, 'context_precision': 0.6500}


Unnamed: 0,user_input,retrieved_contexts,reference,context_recall,context_precision
0,prepare MPD save removable media,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Prepare MPD and save it on the removable media,0.0,0.0
1,check removable media free space,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Check the removable media free space availability,0.0,0.0
2,perform Technical Pre-Flight Procedure SO-5,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1.0,1.0
3,Systems Serviceability Pre-Flight Procedure SO-6,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1.0,1.0
4,ELTA recommends System Book maintenance,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,NOTE: ELTA recommends maintaining a System Book,1.0,1.0
5,System Book separate A/C Book,[SECTION CONTEXT:\nN Verify the following MS C...,e Separate from A/C Book,0.0,0.0
6,log system events System Book,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,e Log system events on System Book,1.0,1.0
7,System Book overdue waivers status,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Ensure SYSTEM BOOK has no overdue waivers,0.0,0.0
8,comprehend installation configuration,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,e Comprehend the configuration of the installa...,0.0,0.0
9,understand existing system faults,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Comprehend the configuration of the installa...,0.0,0.0


In [None]:
print(df_keywords_test_scores.iloc[0]['user_input'])

print(df_keywords_test_scores.iloc[0]['retrieved_contexts'])

print(df_keywords_test_scores.iloc[0]['reference'])

prepare MPD save removable media
['SECTION CONTEXT:\nBefore Power up\n\nOn UTS server, insert the removable media containing MPD']
Prepare MPD and save it on the removable media


In [None]:
"""
In this case the model top 1 retrives a document the contain the words MPD, and removable media ,yet not the one we expected it to return - the one that mentions also the "save". 
It happaned beacuse it seems to preffer shorter sentences in terms of distance (probably the L2 in the faiss database).

When using the top 3, the second result is the right answer.

"""



retrieved_contexts = []
query = df_keywords_test_scores.iloc[0]['user_input']
query_embedding = ragas_embeddings.embed_query(query)
query_embedding_np = np.array([query_embedding]).astype("float32")

# 2. Search the FAISS index (Retrieve top 1 result)
k = 1
distances, indices = index.search(query_embedding_np, 3)

# 3. Extract the actual text based on the returned indices
# indices[0] contains the list of IDs found for the first query
found_texts = [doc_texts[idx] for idx in indices[0]]
retrieved_contexts.append(found_texts)
retrieved_contexts

[['SECTION CONTEXT:\nBefore Power up\n\nOn UTS server, insert the removable media containing MPD',
  'SECTION CONTEXT:\nBefore Flight\n\n1. Mission Planning at MSS\n\na. Prepare MPD and save it on the removable media\n\nb. Check the removable media free space availability\n\n2. Perform Technical Pre-Flight Procedure (SO-5) and Systems Serviceability Pre-Flight Procedure (SO-6)\n\nNOTE: ELTA recommends maintaining a System Book\n\ne Separate from A/C Book\n\ne Log system events on System Book\n\ne Ensure SYSTEM BOOK has no overdue waivers\n\ne Comprehend the configuration of the installation and what the existing system faults are',
  'SECTION CONTEXT:\n3. _ Ensure the following MS switches are in the co rrect state\n\nLOCATION NAME / DESCRIPTION STATE Cockpit Tablets outlets (x2) | ON/OFF EFB DEPRESS PDU#6 CTRL PDU#6 CTRL ON UTILITY PANEL UTILITY PWR OFF GPS PANEL GPS SOURCE AS REQUIRED Rack-6 RDR Tx (Left) DISABLE IOBC RDR Tx (Right) DISABLE LASER Tx NA (NOP) E-RADIO ZEROIZE SECURED\n

In [108]:
found_texts , distances

(['SECTION CONTEXT:\nBefore Power up\n\nOn UTS server, insert the removable media containing MPD',
  'SECTION CONTEXT:\nBefore Flight\n\n1. Mission Planning at MSS\n\na. Prepare MPD and save it on the removable media\n\nb. Check the removable media free space availability\n\n2. Perform Technical Pre-Flight Procedure (SO-5) and Systems Serviceability Pre-Flight Procedure (SO-6)\n\nNOTE: ELTA recommends maintaining a System Book\n\ne Separate from A/C Book\n\ne Log system events on System Book\n\ne Ensure SYSTEM BOOK has no overdue waivers\n\ne Comprehend the configuration of the installation and what the existing system faults are',
  'SECTION CONTEXT:\n3. _ Ensure the following MS switches are in the co rrect state\n\nLOCATION NAME / DESCRIPTION STATE Cockpit Tablets outlets (x2) | ON/OFF EFB DEPRESS PDU#6 CTRL PDU#6 CTRL ON UTILITY PANEL UTILITY PWR OFF GPS PANEL GPS SOURCE AS REQUIRED Rack-6 RDR Tx (Left) DISABLE IOBC RDR Tx (Right) DISABLE LASER Tx NA (NOP) E-RADIO ZEROIZE SECURED\n

## SEMANTIC judge - keywords.

In [None]:
df_keywords_test_scores.drop(['context_recall','context_precision'],axis=1)

Unnamed: 0,user_input,retrieved_contexts,reference,context_recall,context_precision
0,prepare MPD save removable media,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Prepare MPD and save it on the removable media,0.0,0.0
1,check removable media free space,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Check the removable media free space availability,0.0,0.0
2,perform Technical Pre-Flight Procedure SO-5,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1.0,1.0
3,Systems Serviceability Pre-Flight Procedure SO-6,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1.0,1.0
4,ELTA recommends System Book maintenance,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,NOTE: ELTA recommends maintaining a System Book,1.0,1.0
5,System Book separate A/C Book,[SECTION CONTEXT:\nN Verify the following MS C...,e Separate from A/C Book,0.0,0.0
6,log system events System Book,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,e Log system events on System Book,1.0,1.0
7,System Book overdue waivers status,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Ensure SYSTEM BOOK has no overdue waivers,0.0,0.0
8,comprehend installation configuration,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,e Comprehend the configuration of the installa...,0.0,0.0
9,understand existing system faults,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Comprehend the configuration of the installa...,0.0,0.0


In [117]:
from ragas.metrics import AspectCritic

# df_scores
dataset = Dataset.from_pandas(df_keywords_test_scores.drop(['context_recall','context_precision'],axis=1))




# ==========================================
# 4. CUSTOM METRIC (The Semantic Judge)
# ==========================================
print("Step 4: Running Semantic Relevance Evaluation...")

# This prompt comes from Part 3 of your notebook
search_relevance_metric = AspectCritic(
    name="search_relevance",
    definition="""
    Evaluate the search result based on the user query.
    1. If a user searched for this query in a search engine, is this result relevant 
       enough to appear on the first page?
    2. It does NOT need to be the exact ground truth, but it must answer the intent.
    
    Output '1' if relevant, '0' if irrelevant.
    """,
    llm=ragas_llm
)

results = evaluate(
    dataset=dataset,
    metrics=[search_relevance_metric],
    llm=ragas_llm,
    embeddings=ragas_embeddings
)

# ==========================================
# 5. RESULTS
# ==========================================
df_semantic_judge_scores = results.to_pandas()
print("\n=== SEMANTIC EVALUATION REPORT ===")
print(f"Average Relevance Score: {df_semantic_judge_scores['search_relevance'].mean():.2f}")
print("\nTop 5 Successes:")

Step 4: Running Semantic Relevance Evaluation...


Evaluating: 100%|██████████| 40/40 [01:58<00:00,  2.96s/it]



=== SEMANTIC EVALUATION REPORT ===
Average Relevance Score: 0.68

Top 5 Successes:


In [118]:
df_semantic_judge_scores

Unnamed: 0,user_input,retrieved_contexts,reference,search_relevance
0,prepare MPD save removable media,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Prepare MPD and save it on the removable media,0
1,check removable media free space,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,Check the removable media free space availability,0
2,perform Technical Pre-Flight Procedure SO-5,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1
3,Systems Serviceability Pre-Flight Procedure SO-6,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,Perform Technical Pre-Flight Procedure (SO-5) ...,1
4,ELTA recommends System Book maintenance,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,NOTE: ELTA recommends maintaining a System Book,1
5,System Book separate A/C Book,[SECTION CONTEXT:\nN Verify the following MS C...,e Separate from A/C Book,0
6,log system events System Book,[SECTION CONTEXT:\nBefore Flight\n\n1. Mission...,e Log system events on System Book,1
7,System Book overdue waivers status,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Ensure SYSTEM BOOK has no overdue waivers,0
8,comprehend installation configuration,[SECTION CONTEXT:\nBefore Power up\n\nOn UTS s...,e Comprehend the configuration of the installa...,0
9,understand existing system faults,[SECTION CONTEXT:\nTable of Contents\n\nBefore...,e Comprehend the configuration of the installa...,0
