In [1]:
# Install specific versions to avoid conflicts
%pip install langchain langchain-huggingface langchain-community faiss-cpu pypdf sentence-transformers huggingface_hub -q > /dev/null
%pip install -U langchain-google-genai google-generativeai -q > /dev/null
%pip install rank_bm25 -q > /dev/null

In [2]:
%pip install python-dotenv -q > /dev/null

In [3]:
env_content = """
HUGGINGFACEHUB_API_TOKEN="Your_Key_here"
GOOGLE_API_KEY="Your_Key_here"
"""

with open(".env", "w") as f:
    f.write(env_content)

print("'.env' file created successfully. Please edit it with your actual API keys.")

'.env' file created successfully. Please edit it with your actual API keys.


In [4]:
import os
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.chains import RetrievalQA
from langchain_classic.prompts import PromptTemplate

import getpass
from langchain_google_genai import ChatGoogleGenerativeAI

import re
import logging
import getpass
import pandas as pd
import json
from dotenv import load_dotenv

logging.getLogger("pypdf").setLevel(logging.ERROR)

# Load environment variables from .env file
load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

print("Libraries loaded successfully.")

Libraries loaded successfully.


In [5]:
# --- DEFINING THE CLEANING FUNCTION ---
def clean_text(text):
    """
    Removes common PDF artifacts like page numbers or headers
    that confuse the LLM.
    """
    # Remove lines like "--- PAGE 1 ---" or "Page 1 of 50"
    text = re.sub(r'-+\s*PAGE\s*\d+\s*-+', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Page\s+\d+\s+of\s+\d+', '', text, flags=re.IGNORECASE)
    # Remove excessive newlines (optional, helps with chunking)
    text = re.sub(r'\n{3,}', '\n\n', text)
    return text

def load_and_prep_pdf(file_path):
    print(f"Loading PDF: {file_path}...")
    loader = PyPDFLoader(file_path)
    raw_pages = loader.load()

    # Clean the text in each page
    for i, page in enumerate(raw_pages):
        original_len = len(page.page_content)
        page.page_content = clean_text(page.page_content)

        # Debug print for the first page only
        if i == 0:
            print(f"\n--- Page 1 Cleaning Report ---")
            print(f"Original Length: {original_len} chars")
            print(f"Cleaned Length: {len(page.page_content)} chars")

    return raw_pages

# --- EXECUTION ---
# Ensure the file is uploaded to your environment
pdf_path = "sample-service-manual 1.pdf" # <--- CHECK FILENAME
docs = load_and_prep_pdf(pdf_path)

print(f"\n‚úÖ Successfully loaded {len(docs)} pages.")

# INSPECTION: Show the first 500 characters of Page 5 (usually has actual content)
print("\n--- SAMPLE CONTENT (Page 5) ---")
print(docs[4].page_content[:500])
print("...\n(end of sample)")

Loading PDF: sample-service-manual 1.pdf...

--- Page 1 Cleaning Report ---
Original Length: 1782 chars
Cleaned Length: 1782 chars

‚úÖ Successfully loaded 852 pages.

--- SAMPLE CONTENT (Page 5) ---
from the front or rear 
suspension, occurs more in 
cold ambient temperature 
with or without the 
presence of moisture. More 
noticeable over rough 
roads or when turning 
bar insulators 
z Leaf spring 
bushings 
noise is acceptable. 
z Clunk ‚Äî noise from the 
front suspension, occurs in 
and out of turns 
z Loose front 
suspension 
z INSPECT for loose nuts or 
bolts. TIGHTEN to 
specification. Refer to the 
appropriate section in Group 
204 for the procedure. 
z Clunk ‚Äî noise from the 
rear su
...
(end of sample)


In [6]:
# --- SPLIT DOCUMENTS ---
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,       # Characters per chunk
    chunk_overlap=200,    # Overlap to keep context between chunks
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

all_splits = text_splitter.split_documents(docs)

# --- INSPECTION ---
print(f"‚úÖ Split {len(docs)} pages into {len(all_splits)} smaller chunks.\n")

print("--- INSPECTING CHUNK #10 ---")
sample_chunk = all_splits[10]
print(f"Content: \n{sample_chunk.page_content}")
print(f"\nMetadata: {sample_chunk.metadata}")
print("----------------------------")
print("Why check this? Ensure sentences aren't cut in half and metadata (page num) is preserved.")

‚úÖ Split 852 pages into 1202 smaller chunks.

--- INSPECTING CHUNK #10 ---
Content: 
a squeak, creak or rattle 
noise. Occurs mostly over 
bumps or rough roads 
z Front 
suspension 
components 
z Loose or 
damaged shock 
absorber(s) or 
shock absorber 
bushing(s) 
z Damaged 
spring or spring 
mount(s) 
z Damaged or 
worn 
control/radius 
arm bushing(s) 
z Worn or 
damaged 
stabilizer bar 
bushings or link
(s) 
z INSPECT the front 
suspension. INSTALL new 
components as necessary. 
Refer to the appropriate 
section in Group 204 for the 
procedure. 
z Rear suspension noise ‚Äî
a squeak, creak or rattle 
noise. Occurs mostly over 
bumps or rough roads 
z Loose or 
damaged rear 
shock absorber
(s) or shock 
absorber 
bushing(s) 
z Rear leaf spring 
tip isolator or 
bushing 
z Damaged leaf 
spring or leaf 
spring mount(s) 
z Worn or 
z INSPECT the rear 
suspension. INSTALL new 
components as necessary. 
REFER to Section 204-02 . 
Page 5 sur 112014 F-150 Workshop Manual
2014-03-01file:///C:

  separators=["\n\n", "\n", "(?<=\. )", " ", ""]


In [7]:
# --- CREATE VECTOR STORE ---
print("Generating Embeddings (HuggingFace)... this might take a minute...")

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create FAISS Index
vectorstore = FAISS.from_documents(all_splits, embedding_model)

# --- INSPECTION: TEST THE SEARCH ---
test_query = "Torque for brake caliper"
print(f"\n--- TESTING RETRIEVAL for '{test_query}' ---")

# Retrieve top 3 most similar chunks
relevant_docs = vectorstore.similarity_search(test_query, k=3)

for i, doc in enumerate(relevant_docs):
    print(f"\nResult {i+1} (Page {doc.metadata.get('page', 0)+1}):")
    # Print just the first line of the chunk to verify relevance
    print(f"'{doc.page_content[:150]}...'")

print("\n‚úÖ Vector Store is ready.")

Generating Embeddings (HuggingFace)... this might take a minute...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



--- TESTING RETRIEVAL for 'Torque for brake caliper' ---

Result 1 (Page 652):
'General Specifications 
  Torque Specifications 
SECTION 206-04: Rear Disc Brake 2014 F-150 Workshop Manual 
SPECIFICATIONS Procedure revision date: 1...'

Result 2 (Page 672):
'General Specifications 
  Torque Specifications 
SECTION 206-05: Parking Brake and Actuation 2014 F-150 Workshop Manual 
SPECIFICATIONS Procedure revi...'

Result 3 (Page 734):
'Torque Specifications 
a Refer to the procedure in this section. 
SECTION 206-07: Power Brake Actuation 2014 F-150 Workshop Manual 
SPECIFICATIONS Pro...'

‚úÖ Vector Store is ready.


In [8]:
# --- STEP 1: Install & Import ---
# !pip install rank_bm25 -q > /dev/null

from langchain_classic.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

# --- STEP 2: Create the Hybrid Retriever ---
# We use the same 'all_splits' you created earlier

# 1. Keyword Retriever (BM25) - Finds exact part numbers/terms
bm25_retriever = BM25Retriever.from_documents(all_splits)
bm25_retriever.k = 5  # Fetch top 5 keyword matches

# 2. Semantic Retriever (FAISS) - Finds context/meaning
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# 3. Ensemble (Hybrid) - Combines both!
# weights=[0.5, 0.5] means equal importance to keywords and meaning
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.5, 0.5]
)

print("‚úÖ Hybrid Retriever (BM25 + FAISS) created.")

print("‚úÖ Chain updated to use Hybrid Search.")

‚úÖ Hybrid Retriever (BM25 + FAISS) created.
‚úÖ Chain updated to use Hybrid Search.


In [9]:
def debug_retrieval(query):
    print(f"Query: {query}")
    print("-" * 30)

    # Fetch docs using the new hybrid retriever
    docs = ensemble_retriever.invoke(query)

    for i, doc in enumerate(docs):
        print(f"\n[Document {i+1}] (Page {doc.metadata.get('page', 0)+1})")
        print(doc.page_content[:300] + "...") # Show first 300 chars
        print("-" * 30)

# Run a test
debug_retrieval("Torque for the Lug nuts")

Query: Torque for the Lug nuts
------------------------------

[Document 1] (Page 734)
Torque Specifications 
a Refer to the procedure in this section. 
SECTION 206-07: Power Brake Actuation 2014 F-150 Workshop Manual 
SPECIFICATIONS Procedure revision date: 10/25/2013 
Description Nm lb-ft lb-in 
Brake booster nuts a ‚Äî ‚Äî ‚Äî
Brake master cylinder nuts 25 18 ‚Äî
Brake vacuum pump bolts a ...
------------------------------

[Document 2] (Page 128)
Torque Specifications 
a Refer to the procedure in this section 
SECTION 204-02: Rear Suspension 2014 F-150 Workshop Manual 
SPECIFICATIONS Procedure revision date: 10/25/2013 
Description Nm lb-ft lb-in 
Shock absorber nuts 90 66 ‚Äî
Shock absorber shield bolts (SVT Raptor) 4 ‚Äî 35 
Spring shackle-to-...
------------------------------

[Document 3] (Page 140)
General Specifications 
  Torque Specifications 
a Refer to the procedure in this section. 
SECTION 204-04: Wheels and Tires 2014 F-150 Workshop Manual 
SPECIFICATIONS Procedure rev

In [10]:
# --- SETUP GEMINI ---
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    convert_system_message_to_human=True
)

# --- DEFINE PROMPT ---
prompt_template = """
You are a vehicle specification extraction assistant.
Use the Context below to answer the Question.

Context:
{context}

Question:
{question}

Instructions:
1. Extract the specific value requested.
2. Return ONLY a valid JSON object with keys: "component", "spec_type", "value", "unit".
3. If not found, return {{"error": "not found"}}.
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# --- BUILD CHAIN ---
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 9})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=ensemble_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

print("‚úÖ Gemini RAG Chain Initialized.")

‚úÖ Gemini RAG Chain Initialized.


In [11]:
# --- EXECUTION FUNCTION WITH LOGGING ---
def extract_spec_debug(query):
    print(f"\nüîé Processing: '{query}'")

    try:
        # Run the chain
        response = qa_chain.invoke({"query": query})
        raw_text = response['result']

        # LOGGING: See what the LLM actually said
        print(f"   ü§ñ Raw LLM Output: {raw_text}")

        # Cleanup
        clean_text = raw_text.replace("```json", "").replace("```", "").strip()

        # Parse
        data = json.loads(clean_text)

        # Add Source info
        if response['source_documents']:
            doc = response['source_documents'][0]
            page_num = doc.metadata.get('page', 0) + 1
            data['source_page'] = page_num

            # ADD THIS: Get the context snippet used
            # We take the first 200 chars of the chunk to show context
            data['context_snippet'] = doc.page_content[:200].replace("\n", " ") + "..."

            print(f"   üìÑ Found on Page: {page_num}")
            print(f"   üìù Context: {data['context_snippet']}")

        return data

    except json.JSONDecodeError:
        print("   ‚ùå Error: LLM did not return valid JSON.")
        return None
    except Exception as e:
        print(f"   ‚ùå Error: {e}")
        return None

# --- RUN QUERIES ---
questions = [
    "What is the torque for the Brake caliper anchor plate bolts?",
    "What is the Front brake disc minimum thickness?",
    "What is the Lower ball joint nut torque?"
]

results = []
for q in questions:
    res = extract_spec_debug(q)
    if res:
        results.append(res)

# --- FINAL TABLE ---
df = pd.DataFrame(results)
print("\n" + "="*40)
print("FINAL EXTRACTED DATA")
print("="*40)
print(df)

# Save
df.to_json("vehicle_specs_final.json", orient="records", indent=4)
print("\nResults saved to 'vehicle_specs_final.json'")


üîé Processing: 'What is the torque for the Brake caliper anchor plate bolts?'
   ü§ñ Raw LLM Output: ```json
{
  "component": "Brake caliper anchor plate bolts",
  "spec_type": "torque",
  "value": "250",
  "unit": "Nm"
}
```
   üìÑ Found on Page: 638
   üìù Context: Front Disc Brake  The front brake disc system consists of the following components:  z Brake pads  z Brake caliper anchor plate  z Brake caliper  z Brake disc  z Brake disc shield  z Brake flexible ho...

üîé Processing: 'What is the Front brake disc minimum thickness?'
   ü§ñ Raw LLM Output: ```json
{
  "component": "Front brake disc",
  "spec_type": "minimum thickness",
  "value": "32",
  "unit": "mm"
}
```
   üìÑ Found on Page: 618
   üìù Context: 11. Measure the brake disc thickness.  z If the measurement is below the minimum specification, install a new brake disc. For additional  information, refer to Section 206-03 for front disc brakes or ...

üîé Processing: 'What is the Lower ball joint nut torque?'
  

In [12]:
df

Unnamed: 0,component,spec_type,value,unit,source_page,context_snippet
0,Brake caliper anchor plate bolts,torque,250,Nm,638,Front Disc Brake The front brake disc system ...
1,Front brake disc,minimum thickness,32,mm,618,11. Measure the brake disc thickness. z If th...
2,Lower ball joint nut,torque,175,Nm,585,9. Remove and discard the tie-rod end nut. z ...


In [13]:
import re
import math
import pandas as pd
from tqdm import tqdm

# -------------------
# Helper functions
# -------------------
def normalize_text(s):
    """Lowercase, strip, remove extra spaces and commas."""
    if s is None:
        return ""
    s = str(s)
    s = s.strip()
    s = s.replace(",", "")           # remove thousand separators
    s = s.replace("\u2013", "-")     # en dash -> hyphen
    s = s.replace("\u2014", "-")     # em dash -> hyphen
    s = re.sub(r'\s+', ' ', s)
    return s

def extract_unit(s):
    """
    Attempt to extract a trailing unit token from a value string like '55 Nm' or '1.5 L'.
    Returns (value_string_without_unit, unit_string or '').
    """
    if not s:
        return s, ""
    s = s.strip()
    # unit is first run of letters (optionally with symbols) at the end
    m = re.search(r'([0-9\.\-\sto‚Äì‚àí]+)\s*([A-Za-z/%¬∞ŒºuŒ©mmHgptqlB]+[A-Za-z0-9%¬∞ŒºuŒ©\/\-\s]*)?$', s)
    if m:
        valpart = m.group(1).strip()
        unitpart = (m.group(2) or "").strip()
        return valpart, unitpart
    # fallback
    return s, ""

def parse_number_or_range(s):
    """
    Parse numeric string s into a (min_val, max_val) tuple (floats).
    Accepts:
      - single numbers: '55' -> (55.0, 55.0)
      - decimals: '0.203' -> (0.203, 0.203)
      - ranges: '35-40', '35 to 40', '0.203‚Äì0.305' -> (35.0, 40.0)
    If no numeric content found, returns (None, None).
    """
    if s is None:
        return None, None
    s = s.strip()
    if s == "":
        return None, None

    # Normalize 'to' and different dash chars into hyphen
    s_norm = s.lower().replace(' to ', '-').replace('‚Äì', '-').replace('‚Äî', '-').replace('‚àí', '-')
    # Remove stray text
    # Find all float-like tokens
    nums = re.findall(r'[-+]?\d*\.\d+|[-+]?\d+', s_norm)
    if not nums:
        return None, None

    # If there's a hyphen in the string (range), try to use first two numbers as range
    if '-' in s_norm and len(nums) >= 2:
        try:
            a = float(nums[0])
            b = float(nums[1])
            return (min(a,b), max(a,b))
        except:
            pass

    # If multiple numeric tokens but no explicit range, choose the first token as the value
    try:
        val = float(nums[0])
        return (val, val)
    except:
        return None, None

def ranges_overlap(exp_min, exp_max, pred_min, pred_max):
    """Return True if the ranges overlap or one contains the other."""
    if None in (exp_min, exp_max, pred_min, pred_max):
        return False
    return not (exp_max < pred_min or pred_max < exp_min)

def numeric_close(a, b, rel_tol=0.02, abs_tol=1e-6):
    """
    Compare numerics with tolerance.
    rel_tol default 2% (can be adjusted). abs_tol small to handle extremely small numbers.
    """
    try:
        return math.isclose(a, b, rel_tol=rel_tol, abs_tol=abs_tol)
    except:
        return False



In [14]:
# -------------------
# Evaluation function (drop-in replacement)
# -------------------
def run_batch_evaluation(file_path="ground_truth.csv", sample_size=5, rel_tol=0.02, abs_tol=1e-6):
    # Load questions
    try:
        df_gt = pd.read_csv(file_path)
    except FileNotFoundError:
        print("‚ùå ground_truth.csv not found. Please run the generation script first.")
        return None

    # sample (handle smaller files)
    df_sample = df_gt.sample(n=min(sample_size, len(df_gt)), random_state=42)
    print(f"üöÄ Starting evaluation on {len(df_sample)} random questions...")

    results = []
    correct_count = 0

    for index, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
        query = normalize_text(row.get('query', ''))
        expected_val_raw = row.get('expected_value', '')
        expected_unit_raw = row.get('expected_unit', '')

        expected_val_raw = "" if pd.isna(expected_val_raw) else str(expected_val_raw)
        expected_unit_raw = "" if pd.isna(expected_unit_raw) else str(expected_unit_raw)

        expected_val_raw = normalize_text(expected_val_raw)
        expected_unit_raw = normalize_text(expected_unit_raw)

        try:
            # Call your RAG pipeline
            prediction = extract_spec_debug(query)  # unchanged call

            # Normalize prediction structure
            pred_val_raw = ""
            pred_unit_raw = ""
            pred_source_page = "N/A"
            if prediction:
                # Prediction can be dict or string, try to handle both
                if isinstance(prediction, dict):
                    pred_val_raw = str(prediction.get("value", "") or "")
                    pred_unit_raw = str(prediction.get("unit", "") or "")
                    pred_source_page = prediction.get("source_page", "N/A")
                else:
                    pred_val_raw = str(prediction)

            pred_val_raw = normalize_text(pred_val_raw)
            pred_unit_raw = normalize_text(pred_unit_raw)

            # Extract unit-aware value parts
            exp_val_part, exp_unit_from_val = extract_unit(expected_val_raw)
            # prefer explicit expected_unit column if present, else from value string
            exp_unit = expected_unit_raw if expected_unit_raw else exp_unit_from_val

            pred_val_part, pred_unit_from_val = extract_unit(pred_val_raw)
            pred_unit = pred_unit_raw if pred_unit_raw else pred_unit_from_val

            # parse numeric ranges
            exp_min, exp_max = parse_number_or_range(exp_val_part)
            pred_min, pred_max = parse_number_or_range(pred_val_part)

            status = "FAIL"
            reason = ""

            # 1) If expected numeric is None, fallback to string contains
            if exp_min is None:
                # fallback textual comparison
                if expected_val_raw and expected_val_raw in pred_val_raw:
                    status = "PASS"
                    reason = "Expected string found in prediction."
                else:
                    reason = "No numeric expected value to compare; textual match not found."
            else:
                # We have numeric expected value(s). Use range containment / overlap / closeness.
                # Case A: predicted is a range and expected number lies within predicted range
                if pred_min is not None and pred_max is not None:
                    if pred_min <= exp_min <= pred_max or ranges_overlap(exp_min, exp_max, pred_min, pred_max):
                        status = "PASS"
                        reason = f"Expected number {exp_min} in predicted range [{pred_min}, {pred_max}]."
                    else:
                        # check closeness to boundaries
                        if numeric_close(exp_min, pred_min, rel_tol=rel_tol, abs_tol=abs_tol) or numeric_close(exp_min, pred_max, rel_tol=rel_tol, abs_tol=abs_tol):
                            status = "PASS"
                            reason = f"Expected {exp_min} close to predicted boundary ({pred_min} or {pred_max})."
                        else:
                            reason = f"Expected {exp_min} not in predicted range [{pred_min}, {pred_max}]."
                # Case B: predicted is single number
                elif pred_min is not None:
                    if exp_min is not None:
                        # compare numerically with tolerance
                        if numeric_close(exp_min, pred_min, rel_tol=rel_tol, abs_tol=abs_tol):
                            status = "PASS"
                            reason = f"Numeric match within tolerance: expected {exp_min}, predicted {pred_min}."
                        else:
                            # if expected is a range and any overlap with predicted single number
                            if exp_min <= pred_min <= (exp_max if exp_max is not None else exp_min):
                                status = "PASS"
                                reason = f"Predicted {pred_min} falls inside expected range [{exp_min}, {exp_max}]."
                            else:
                                reason = f"Numeric mismatch: expected {exp_min}-{exp_max}, predicted {pred_min}."
                    else:
                        reason = "Expected numeric parse failed while predicted numeric available."
                else:
                    # No numeric predicted
                    # last fallback: substring check
                    if str(exp_min) in pred_val_raw:
                        status = "PASS"
                        reason = "Expected numeric appears as substring in prediction."
                    else:
                        reason = "Could not parse numeric from prediction."

            # Units: annotate if unit strings differ (not automatic fail)
            unit_note = ""
            if exp_unit and pred_unit and exp_unit != pred_unit:
                unit_note = f" (unit mismatch: expected '{exp_unit}' vs predicted '{pred_unit}')"

            if status == "PASS":
                correct_count += 1

            results.append({
                "Question": query,
                "Expected": f"{expected_val_raw} {expected_unit_raw}".strip(),
                "Predicted": f"{pred_val_raw} {pred_unit}".strip(),
                "Status": status,
                "Source Page": pred_source_page,
                "Reason": reason + unit_note
            })

        except Exception as e:
            results.append({
                "Question": query,
                "Expected": f"{expected_val_raw} {expected_unit_raw}".strip(),
                "Predicted": f"Error: {e}",
                "Status": "ERROR",
                "Source Page": "N/A",
                "Reason": str(e)
            })

    # Calculate Metrics
    accuracy = (correct_count / len(df_sample)) * 100 if len(df_sample) > 0 else 0.0
    print(f"\nüìä Evaluation Complete!")
    print(f"Final Accuracy: {accuracy:.2f}% ({correct_count}/{len(df_sample)})")

    # Save Results
    results_df = pd.DataFrame(results)
    results_df.to_csv("final_evaluation_results.csv", index=False)
    print("Detailed results saved to 'final_evaluation_results.csv'")

    return results_df


In [16]:
eval_results = run_batch_evaluation(sample_size=5)
display(eval_results)

üöÄ Starting evaluation on 5 random questions...


  0%|          | 0/5 [00:00<?, ?it/s]


üîé Processing: 'What is the The allowable backlash is?'


 20%|‚ñà‚ñà        | 1/5 [00:03<00:14,  3.53s/it]

   ü§ñ Raw LLM Output: ```json
{
  "component": "backlash",
  "spec_type": "allowable",
  "value": "0.203 to 0.305",
  "unit": "mm"
}
```
   üìÑ Found on Page: 571
   üìù Context: 2. Position the Dial Indicator Gauge with Holding Fixture and Clutch Housing Gauge tip centrally  on a drive tooth.  3. Zero the Dial Indicator.  4. Turn the differential ring gear without turning the...

üîé Processing: 'What is the torque for the Shock rod nut?'


 40%|‚ñà‚ñà‚ñà‚ñà      | 2/5 [00:05<00:08,  2.83s/it]

   ü§ñ Raw LLM Output: ```json
{
  "component": "Shock rod nut",
  "spec_type": "torque",
  "value": 55,
  "unit": "Nm"
}
```
   üìÑ Found on Page: 742
   üìù Context: Remove the stoplamp switch. For additional information, refer to Section 417-01 .  9. NOTE: The booster push rod clevis-locking pin is a one-time use only part. Any time the booster push  rod clevis-l...

üîé Processing: 'What is the Separate the clamp heads about?'


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 3/5 [00:07<00:04,  2.23s/it]

   ü§ñ Raw LLM Output: ```json
{
  "component": "clamp heads",
  "spec_type": "separation distance",
  "value": "13",
  "unit": "mm"
}
```
   üìÑ Found on Page: 233
   üìù Context: 4. If no improvement is noted, rotate the clamps in opposite directions, equal distances from the best  position determined in Step 2. Separate the clamp heads about 13 mm (1/2 in) and recheck for  vi...

üîé Processing: 'What is the torque for the Part of kit?'


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4/5 [00:09<00:02,  2.31s/it]

   ü§ñ Raw LLM Output: {"error": "not found"}
   üìÑ Found on Page: 321
   üìù Context: Removal  NOTICE: The color on the rear face of the drive pinion nut is critical to this repair. Use the same  color new drive pinion nut for installation. If a new collapsible spacer must be installed...

üîé Processing: 'What is the torque for the Wheel bearing and wheel hub bolts?'


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:11<00:00,  2.39s/it]

   ü§ñ Raw LLM Output: ```json
{
  "component": "Wheel bearing and wheel hub bolts",
  "spec_type": "torque",
  "value": "175",
  "unit": "Nm"
}
```
   üìÑ Found on Page: 779
   üìù Context: speed sensor.  No  GO to E9 .  E9 CHECK THE WHEEL SPEED SENSOR(S) FOR  DAMAGE  z Ignition OFF.  z With the vehicle in NEUTRAL, position it on a hoist.  Refer to Section 100-02 .  z Remove the wheel sp...

üìä Evaluation Complete!
Final Accuracy: 80.00% (4/5)
Detailed results saved to 'final_evaluation_results.csv'





Unnamed: 0,Question,Expected,Predicted,Status,Source Page,Reason
0,What is the The allowable backlash is?,0.203 mm,0.203 to 0.305 mm,PASS,571,Expected number 0.203 in predicted range [-0.3...
1,What is the torque for the Shock rod nut?,55.0 Nm,55 Nm,PASS,742,"Expected number 55.0 in predicted range [55.0,..."
2,What is the Separate the clamp heads about?,13.0 mm,13 mm,PASS,233,"Expected number 13.0 in predicted range [13.0,..."
3,What is the torque for the Part of kit?,4880.0 Nm,,FAIL,321,Could not parse numeric from prediction.
4,What is the torque for the Wheel bearing and w...,175.0 Nm,175 Nm,PASS,779,Expected number 175.0 in predicted range [175....


In [15]:
import gradio as gr
import json

def gradio_interface(query):
    """
    Processing function for Gradio.
    Returns two outputs: The formatted answer and the source context.
    """
    try:
        # Run the RAG chain
        # Note: This uses the 'qa_chain' you defined in previous cells
        response = qa_chain.invoke({"query": query})

        # 1. Parse the JSON Result
        raw_text = response['result']
        # Clean potential markdown from LLM
        clean_text = raw_text.replace("```json", "").replace("```", "").strip()

        try:
            data = json.loads(clean_text)

            if "error" in data:
                answer_display = "‚ö†Ô∏è **Specification not found in the manual.**"
                evidence_display = "No relevant context found."
            else:
                # Professional Markdown Formatting
                answer_display = (
                    f"### ‚úÖ Extracted Specification\n"
                    f"**Component:** {data.get('component', 'N/A')}\n\n"
                    f"**Spec Type:** {data.get('spec_type', 'N/A')}\n\n"
                    f"# {data.get('value')} {data.get('unit')}"
                )
        except json.JSONDecodeError:
             answer_display = f"‚ö†Ô∏è **Parsing Error**\nThe model returned text that wasn't valid JSON.\n\nRaw Output:\n{clean_text}"
             evidence_display = "N/A"

        # 2. Format the Source Evidence
        evidence_display = ""
        if response.get('source_documents'):
            # Get the top document (most relevant)
            doc = response['source_documents'][0]
            page_num = doc.metadata.get('page', 0) + 1

            # Clean up newlines for display
            content_snippet = doc.page_content.replace("\n", " ")[:400] + "..."

            evidence_display = (
                f"### üìÑ Source Verification\n"
                f"**Page Number:** {page_num}\n\n"
                f"**Context Snippet:**\n> {content_snippet}"
            )
        else:
            evidence_display = "No source documents retrieved."

        return answer_display, evidence_display

    except Exception as e:
        return f"‚ùå **System Error:** {str(e)}", ""

# --- DEFINE THE UI LAYOUT ---
# We use 'Blocks' for a custom, professional layout
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üîß AutoSpec RAG System")
    gr.Markdown("Extract technical vehicle specifications from the 2014 F-150 Service Manual using Hybrid Search (BM25 + FAISS).")

    with gr.Row():
        with gr.Column(scale=2):
            query_input = gr.Textbox(
                label="Technical Question",
                placeholder="e.g., What is the torque for the lug nuts?",
                lines=1
            )
        with gr.Column(scale=1):
            # A big primary button
            submit_btn = gr.Button("üîç Extract Specification", variant="primary")

    gr.Markdown("---")

    with gr.Row():
        # Left column for the Result
        with gr.Column():
            answer_output = gr.Markdown(label="Extracted Data")

        # Right column for the Evidence
        with gr.Column():
            evidence_output = gr.Markdown(label="Source Context")

    # Connect the logic
    submit_btn.click(
        fn=gradio_interface,
        inputs=query_input,
        outputs=[answer_output, evidence_output]
    )

    # Allow pressing "Enter" to submit
    query_input.submit(
        fn=gradio_interface,
        inputs=query_input,
        outputs=[answer_output, evidence_output]
    )

# --- LAUNCH ---
print("Launching Gradio UI...")
demo.launch(share=True)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


Launching Gradio UI...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://955479ed5b7a36ada9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


