Load preprocessed chunks, create embeddings using HooshvareLab’s BERT-FA model, and build a FAISS vector store.

In [None]:
import os
import pickle
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

# Import chunks from file or re-compute by invoking Notebook 2’s logic
with open(os.path.join("..", "data", "chunks.pkl"), "rb") as f:
    chunks = pickle.load(f)

print(f"Loaded {len(chunks)} chunks.")

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="HooshvareLab/bert-fa-base-uncased",
    model_kwargs={"device": "cpu"}  # Use "cuda:0" if GPU embedding is preferred
)

Note that CPU embeddings may be slower; change device to GPU if needed and available

In [None]:
vectorstore = FAISS.from_documents(chunks, embeddings)
print("FAISS index constructed with", vectorstore.index.ntotal, "vectors.")

In [None]:
index_path = os.path.join("..", "data", "faiss_index.faiss")
vectorstore.save_local(index_path)
print(f"Index saved to {index_path}")