In [1]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp313-cp313-win_amd64.whl.metadata (5.2 kB)
Downloading faiss_cpu-1.12.0-cp313-cp313-win_amd64.whl (18.2 MB)
   ---------------------------------------- 0.0/18.2 MB ? eta -:--:--
   - -------------------------------------- 0.5/18.2 MB 2.6 MB/s eta 0:00:07
   --- ------------------------------------ 1.6/18.2 MB 4.1 MB/s eta 0:00:05
   ----- ---------------------------------- 2.4/18.2 MB 4.2 MB/s eta 0:00:04
   ------ --------------------------------- 3.1/18.2 MB 4.1 MB/s eta 0:00:04
   --------- ------------------------------ 4.2/18.2 MB 4.2 MB/s eta 0:00:04
   ---------- ----------------------------- 5.0/18.2 MB 4.1 MB/s eta 0:00:04
   ------------ --------------------------- 5.8/18.2 MB 4.1 MB/s eta 0:00:03
   --------------- ------------------------ 6.8/18.2 MB 4.1 MB/s eta 0:00:03
   ---------------- ----------------------- 7.6/18.2 MB 4.1 MB/s eta 0:00:03
   ------------------ --------------------- 8.4/18.2 MB 4.1 MB/s eta 0:00:03

In [5]:
import json
import numpy as np
import faiss
import os

# Paths
EMBEDDINGS_FILE = r"E:\Companion-AI\data\vectorstore\embeddings.json"
INDEX_FILE = r"E:\Companion-AI\data\vectorstore\faiss_index.bin"
METADATA_FILE = r"E:\Companion-AI\data\vectorstore\faiss_metadata.json"


# 1. Load embeddings
with open(EMBEDDINGS_FILE, "r", encoding="utf-8") as f:
    data = json.load(f)

print(f"Loaded {len(data)} embeddings")

# 2. Convert to numpy array (float32 for FAISS)
embeddings = np.array([d["embedding"] for d in data]).astype("float32")

# 3. Build FAISS index
dimension = embeddings.shape[1]  # embedding vector size
index = faiss.IndexFlatL2(dimension)  # L2 distance - Euclidean Distance
index.add(embeddings)
print(f"FAISS index built with {index.ntotal} vectors")

# 4. Save index
faiss.write_index(index, INDEX_FILE)

# 5. Save metadata separately (file_name + chunk_id for lookup)
metadata = [{"file_name": d["file_name"], "chunk_id": d["chunk_id"]} for d in data]
with open(METADATA_FILE, "w", encoding="utf-8") as f:
    json.dump(metadata, f)

print(f"✅ Index saved as {INDEX_FILE}")
print(f"✅ Metadata saved as {METADATA_FILE}")


Loaded 422 embeddings
FAISS index built with 422 vectors
✅ Index saved as E:\Companion-AI\data\vectorstore\faiss_index.bin
✅ Metadata saved as E:\Companion-AI\data\vectorstore\faiss_metadata.json


In [6]:
import faiss
import numpy as np
import json
import requests

# --- Paths ---
INDEX_FILE = r"E:\Companion-AI\data\vectorstore\faiss_index.bin"
METADATA_FILE = r"E:\Companion-AI\data\vectorstore\faiss_metadata.json"

# --- Load FAISS index ---
index = faiss.read_index(INDEX_FILE)

# --- Load metadata ---
with open(METADATA_FILE, "r", encoding="utf-8") as f:
    metadata = json.load(f)

print(f"Loaded FAISS index with {index.ntotal} vectors")
print(f"Loaded metadata for {len(metadata)} chunks")

# --- Function: embed query using NIM ---
def embed_query(query_text):
    url = "http://172.16.5.50:8000/v1/embeddings"   # GPU server NIM endpoint
    payload = {
        "model": "nvidia/llama-3.2-nv-embedqa-1b-v2",
        "input": [query_text],
        "input_type": "query"   # ⚠️ important: "query" here
    }
    response = requests.post(url, json=payload)
    response.raise_for_status()
    return np.array(response.json()["data"][0]["embedding"], dtype="float32").reshape(1, -1)

# --- Run a test query ---
query = "Why is my fridge making a buzzing noise?"
query_vec = embed_query(query)

# --- Search FAISS ---
k = 3  # top results
distances, indices = index.search(query_vec, k)

print("\n🔍 Top Matches:")
for rank, idx in enumerate(indices[0]):
    meta = metadata[idx]
    print(f"{rank+1}. File: {meta['file_name']}, Chunk ID: {meta['chunk_id']}, Distance: {distances[0][rank]}")


Loaded FAISS index with 422 vectors
Loaded metadata for 422 chunks

🔍 Top Matches:
1. File: LG_Fridge_2.txt, Chunk ID: 23, Distance: 0.9177261590957642
2. File: LG_Fridge_3.txt, Chunk ID: 35, Distance: 1.0003751516342163
3. File: LG_Fridge_2.txt, Chunk ID: 64, Distance: 1.0956792831420898


In [7]:
# Load full embeddings data (with text) to retrieve content
with open(r"E:\Companion-AI\data\vectorstore\embeddings.json", "r", encoding="utf-8") as f:
    embeddings_data = json.load(f)

# Build a lookup dictionary: (file_name, chunk_id) -> text
chunk_lookup = {
    (item["file_name"], item["chunk_id"]): item.get("text", "")
    for item in embeddings_data
}

print("\n🔍 Top Matches with Text:")
for rank, idx in enumerate(indices[0]):
    meta = metadata[idx]
    text = chunk_lookup.get((meta['file_name'], meta['chunk_id']), "⚠️ Text not found")
    print(f"\n{rank+1}. File: {meta['file_name']}, Chunk ID: {meta['chunk_id']}, Distance: {distances[0][rank]}")
    print(f"   Text Preview: {text[:300]}...")



🔍 Top Matches with Text:

1. File: LG_Fridge_2.txt, Chunk ID: 23, Distance: 0.9177261590957642
   Text Preview: ...

2. File: LG_Fridge_3.txt, Chunk ID: 35, Distance: 1.0003751516342163
   Text Preview: ...

3. File: LG_Fridge_2.txt, Chunk ID: 64, Distance: 1.0956792831420898
   Text Preview: ...


In [8]:
pip install langdetect


Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
     ---------------------------------------- 0.0/981.5 kB ? eta -:--:--
     ---------- ----------------------------- 262.1/981.5 kB ? eta -:--:--
     -------------------------------------- 981.5/981.5 kB 2.4 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py): started
  Building wheel for langdetect (setup.py): finished with status 'done'
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993250 sha256=5294f10188eac84503d3e443102b361a3f7101e63c71fdce5e319d49aac92975
  Stored in directory: c:\users\jaini solanki\appdata\local\pip\cache\wheels\eb\87\25\2dddf1c94e1786054e25022ec5530bfed52bad86d882999c48
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9
Note: you may need

  DEPRECATION: Building 'langdetect' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'langdetect'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [10]:
import faiss, json, requests, numpy as np

# --- Paths ---
INDEX_FILE = r"E:\Companion-AI\data\vectorstore\faiss_index.bin"
METADATA_FILE = r"E:\Companion-AI\data\vectorstore\faiss_metadata.json"
EMBEDDINGS_FILE = r"E:\Companion-AI\data\vectorstore\embeddings.json"

# --- Services ---
EMBEDDING_URL = "http://172.16.5.50:8000/v1/embeddings"
EMBED_MODEL = "nvidia/llama-3.2-nv-embedqa-1b-v2"

# --- Load FAISS index + metadata ---
index = faiss.read_index(INDEX_FILE)
with open(METADATA_FILE, "r", encoding="utf-8") as f:
    metadata = json.load(f)
with open(EMBEDDINGS_FILE, "r", encoding="utf-8") as f:
    embeddings_data = json.load(f)
chunk_lookup = {(d["file_name"], d["chunk_id"]): d.get("text", "") for d in embeddings_data}

# --- Helper: embed query ---
def embed_query(query: str):
    payload = {"model": EMBED_MODEL, "input": [query], "input_type": "query"}
    r = requests.post(EMBEDDING_URL, json=payload)
    r.raise_for_status()
    return np.array(r.json()["data"][0]["embedding"], dtype="float32").reshape(1, -1)

# --- Helper: retrieve top-k chunks ---
def retrieve_top_k(query_vec, k=3):
    D, I = index.search(query_vec, k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        meta = metadata[idx]
        text = chunk_lookup.get((meta['file_name'], meta['chunk_id']), "")
        results.append({"file": meta["file_name"], "chunk": meta["chunk_id"], "distance": float(dist), "text": text})
    return results


In [11]:
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0  # ensures consistent results

def build_prompt(query, retrieved_chunks, max_chars=1200):
    """
    Build a prompt for the LLM using retrieved chunks, with language detection.
    """
    context_parts = []
    total_chars = 0
    
    for r in retrieved_chunks:
        snippet = r["text"].strip()
        if not snippet:
            continue
        
        # Detect language (try-catch to avoid errors on short/garbled text)
        try:
            lang = detect(snippet[:200])  # detect on first 200 chars
        except:
            lang = "unknown"
        
        lang_note = ""
        if lang != "en":
            lang_note = f"\n⚠️ NOTE: This chunk is in {lang.upper()} language."
        
        # Truncate long chunks
        if len(snippet) > max_chars:
            snippet = snippet[:max_chars] + " ... (truncated)"
        
        context_parts.append(f"[SOURCE: {r['file']}#{r['chunk']}]{lang_note}\n{snippet}")
        total_chars += len(snippet)
        
        if total_chars > 5000:  # stop if too large
            break
    
    context_block = "\n\n".join(context_parts)
    
    prompt = f"""
You are a helpful assistant for appliance manuals. 
Use ONLY the information in the context below to answer the user's question.
If the context does not contain the answer, say "I don’t know" and suggest safe next steps.

CONTEXT:
{context_block}

QUESTION:
{query}

INSTRUCTIONS:
- Answer clearly in simple language.
- Provide step-by-step troubleshooting if applicable.
- Always cite sources in this format: [SOURCE: filename#chunkID].
"""
    return prompt.strip()

# --- Example run ---
user_query = "Why is my fridge making a buzzing noise?"

query_vec = embed_query(user_query)
retrieved_chunks = retrieve_top_k(query_vec, k=3)

prompt = build_prompt(user_query, retrieved_chunks)

print("=== FINAL PROMPT TO SEND TO LLM ===\n")
print(prompt[:2000])  # preview first 2000 chars


=== FINAL PROMPT TO SEND TO LLM ===

You are a helpful assistant for appliance manuals. 
Use ONLY the information in the context below to answer the user's question.
If the context does not contain the answer, say "I don’t know" and suggest safe next steps.

CONTEXT:


QUESTION:
Why is my fridge making a buzzing noise?

INSTRUCTIONS:
- Answer clearly in simple language.
- Provide step-by-step troubleshooting if applicable.
- Always cite sources in this format: [SOURCE: filename#chunkID].


In [12]:
# Diagnostic + Prompt Builder - self contained cell
import os, json, requests, numpy as np, faiss
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0

# ---------- CONFIG: adjust if needed ----------
EMBEDDING_URL = "http://172.16.5.50:8000/v1/embeddings"
EMBED_MODEL = "nvidia/llama-3.2-nv-embedqa-1b-v2"

# Candidate paths (tries these in order)
EMB_JSON_CANDIDATES = [
    r"E:\Companion-AI\data\vectorstore\embeddings.json",
    "../data/vectorstore/embeddings.json",
    "/LAB/data/vectorstore/embeddings.json",
    "/mnt/data/vectorstore/embeddings.json"
]
INDEX_CANDIDATES = [
    r"E:\Companion-AI\data\vectorstore\faiss_index.bin",
    "../data/vectorstore/faiss_index.bin",
    "/LAB/faiss_index.bin",
    "/mnt/data/faiss_index.bin"
]
META_CANDIDATES = [
    r"E:\Companion-AI\data\vectorstore\faiss_metadata.json",
    "../data/vectorstore/faiss_metadata.json",
    "/LAB/faiss_metadata.json",
    "/mnt/data/faiss_metadata.json"
]
CHUNKS_CANDIDATES = [
    r"E:\Companion-AI\data\chunks\chunks.json",
    "../data/chunks/chunks.json",
    "/LAB/data/chunks/chunks.json",
    "/mnt/data/chunks/chunks.json"
]

def find_first_existing(paths):
    for p in paths:
        if os.path.exists(p):
            return p
    return None

EMBEDDINGS_FILE = find_first_existing(EMB_JSON_CANDIDATES)
INDEX_FILE = find_first_existing(INDEX_CANDIDATES)
METADATA_FILE = find_first_existing(META_CANDIDATES)
CHUNKS_FILE = find_first_existing(CHUNKS_CANDIDATES)

print("Paths found:")
print(" embeddings.json ->", EMBEDDINGS_FILE)
print(" faiss_index.bin ->", INDEX_FILE)
print(" faiss_metadata.json ->", METADATA_FILE)
print(" chunks.json (fallback) ->", CHUNKS_FILE)
print("")

# Sanity checks
if INDEX_FILE is None or METADATA_FILE is None:
    raise RuntimeError("FAISS index or metadata file not found. Check INDEX_CANDIDATES/META_CANDIDATES paths.")

# Load index + metadata
index = faiss.read_index(INDEX_FILE)
with open(METADATA_FILE, "r", encoding="utf-8") as f:
    metadata = json.load(f)

print("FAISS index loaded. ntotal vectors =", index.ntotal)
print("Metadata entries =", len(metadata))
if index.ntotal != len(metadata):
    print("⚠️ Warning: index.ntotal and len(metadata) differ. Make sure metadata matches the index used to build it.")

# Try to load embeddings.json (may or may not include text)
embeddings_data = None
if EMBEDDINGS_FILE:
    try:
        with open(EMBEDDINGS_FILE, "r", encoding="utf-8") as f:
            embeddings_data = json.load(f)
        print("Loaded embeddings.json; entries =", len(embeddings_data))
    except Exception as e:
        print("Could not load embeddings.json:", e)
else:
    print("embeddings.json not found in candidate list.")

# Build chunk_lookup: prefer embeddings_data[text] if present, else fallback to chunks.json
chunk_lookup = {}
if embeddings_data and isinstance(embeddings_data, list) and embeddings_data and "text" in embeddings_data[0]:
    print("Using 'text' field from embeddings.json to build chunk_lookup.")
    for d in embeddings_data:
        chunk_lookup[(d["file_name"], d["chunk_id"])] = d.get("text", "")
else:
    # fallback: try chunks.json (original chunks with text)
    if CHUNKS_FILE:
        try:
            with open(CHUNKS_FILE, "r", encoding="utf-8") as f:
                chunks_src = json.load(f)
            print("Loaded chunks.json with", len(chunks_src), "entries.")
            # expected keys: file_name, chunk_id, text
            for d in chunks_src:
                key = (d.get("file_name"), d.get("chunk_id"))
                chunk_lookup[key] = d.get("text", "")
            print("Built chunk_lookup from chunks.json.")
        except Exception as e:
            print("Failed to load chunks.json for fallback:", e)
    else:
        print("No chunks.json fallback found and embeddings.json had no 'text' field.")
        print("=> This is likely the reason your context was empty. You need the original chunk texts.")
        # continue with empty lookup (we'll show diagnostics)

# Quick sample check: show a few keys
sample_keys = list(chunk_lookup.keys())[:5]
print("Sample chunk_lookup keys (up to 5):", sample_keys)
print("Example text length for first sample (if any):", len(chunk_lookup.get(sample_keys[0], "")) if sample_keys else "no data")
print("")

# --- Embedding & retrieval helpers ---
import numpy as np
def embed_query(query: str):
    payload = {"model": EMBED_MODEL, "input": [query], "input_type": "query"}
    try:
        r = requests.post(EMBEDDING_URL, json=payload, timeout=30)
        r.raise_for_status()
        emb = r.json()["data"][0]["embedding"]
        return np.array(emb, dtype="float32").reshape(1, -1)
    except Exception as e:
        print("Embedding call failed:", e)
        return None

def retrieve_top_k(query_vec, k=3):
    if query_vec is None:
        print("No query_vec provided.")
        return []
    D, I = index.search(query_vec, k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        if idx < 0 or idx >= len(metadata):
            print("Invalid index returned by FAISS:", idx)
            continue
        meta = metadata[idx]
        text = chunk_lookup.get((meta['file_name'], meta['chunk_id']), "")
        results.append({"file": meta['file_name'], "chunk": meta['chunk_id'], "distance": float(dist), "text": text})
    return results

# --- Prompt builder with language detection ---
def build_prompt(query, retrieved_chunks, max_chars=1500):
    context_parts = []
    total_chars = 0
    for r in retrieved_chunks:
        snippet = (r.get("text") or "").strip()
        if not snippet:
            # Show where the missing text is coming from
            snippet = ""
            print(f"⚠️ Missing text for {r['file']}#{r['chunk']} (distance {r['distance']:.3f})")
            continue
        try:
            lang = detect(snippet[:200])
        except:
            lang = "unknown"
        lang_note = "" if lang == "en" else f"\n⚠️ NOTE: This chunk is in language '{lang}'"
        if len(snippet) > max_chars:
            snippet = snippet[:max_chars] + " ... (truncated)"
        context_parts.append(f"[SOURCE: {r['file']}#{r['chunk']}] {lang_note}\n{snippet}")
        total_chars += len(snippet)
        if total_chars > 8000:
            break
    context_block = "\n\n".join(context_parts)
    prompt = f"""You are a helpful assistant for appliance manuals.
Use ONLY the information in the context below to answer the user's question.
If the context does not contain the answer, say "I don’t know" and suggest safe next steps.

CONTEXT:
{context_block}

QUESTION:
{query}

INSTRUCTIONS:
- Answer clearly in simple language.
- Provide step-by-step troubleshooting if applicable.
- Always cite sources in this format: [SOURCE: filename#chunkID].
"""
    return prompt.strip()

# ---------- Run example ----------
user_query = "Why is my fridge making a buzzing noise?"
print("\n--- Running retrieval for query:", user_query, "---")
qvec = embed_query(user_query)
if qvec is None:
    print("Embedding failed — check EMBEDDING_URL and server. No retrieval possible.")
else:
    retrieved = retrieve_top_k(qvec, k=4)
    print("Retrieved results count:", len(retrieved))
    for i, r in enumerate(retrieved, 1):
        txt_preview = (r['text'] or "")[:200].replace("\n"," ")
        print(f"{i}. {r['file']}#{r['chunk']} dist={r['distance']:.4f} text_present={bool(r['text'])} preview: {txt_preview!s}")
    # Build prompt
    prompt = build_prompt(user_query, retrieved)
    print("\n=== FINAL PROMPT (preview, first 2000 chars) ===\n")
    print(prompt[:2000])


Paths found:
 embeddings.json -> E:\Companion-AI\data\vectorstore\embeddings.json
 faiss_index.bin -> E:\Companion-AI\data\vectorstore\faiss_index.bin
 faiss_metadata.json -> E:\Companion-AI\data\vectorstore\faiss_metadata.json
 chunks.json (fallback) -> E:\Companion-AI\data\chunks\chunks.json

FAISS index loaded. ntotal vectors = 422
Metadata entries = 422
Loaded embeddings.json; entries = 422
Loaded chunks.json with 422 entries.
Built chunk_lookup from chunks.json.
Sample chunk_lookup keys (up to 5): [('LG_Fridge_1.txt', 1), ('LG_Fridge_1.txt', 2), ('LG_Fridge_1.txt', 3), ('LG_Fridge_1.txt', 4), ('LG_Fridge_1.txt', 5)]
Example text length for first sample (if any): 2906


--- Running retrieval for query: Why is my fridge making a buzzing noise? ---
Retrieved results count: 4
1. LG_Fridge_2.txt#23 dist=0.9177 text_present=True preview: Rattling: Rattling noises may come from the ﬂow of refrigerant, the water line, or items stored on top of the refrigerator. Whooshing: Popping: Contrac