In [0]:
%run ./00_setup_environment.ipynb

In [0]:
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
import os

In [0]:
chunks_df = spark.read.format('delta').load('/Volumes/genai_catalog/car_manuals/manual_chunks/chunks_delta')
chunks = chunks_df.collect()

In [0]:

documents = [
    Document(page_content=chunk["text"], metadata={"language": chunk["language"], "source": chunk["source"]})
    for chunk in chunks
]

In [0]:
# Load embedding model 
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/distiluse-base-multilingual-cased-v2"
)


In [0]:
# Create FAISS index
faiss_index = FAISS.from_documents(documents, embedding_model)
# Save FAISS index locally (in Unity Catalog volume path)
index_save_path = "/Volumes/genai_catalog/car_manuals/manual_chunks/faiss_index"
faiss_index.save_local(index_save_path)

In [0]:
print(f"FAISS index saved to: {index_save_path}")

In [0]:
# Load and query
loaded_index = FAISS.load_local(
    index_save_path, 
    embedding_model, 
    allow_dangerous_deserialization=True #You should only enable this if you're loading a FAISS index that you created and trust — which is the case here.
    )
query = "How do I change a tire?"
results = loaded_index.similarity_search(query, k=3)

for res in results:
    print(f"[{res.metadata['language']}] {res.page_content[:300]}\n---\n")
