In [None]:
!pip install sentence-transformers faiss-gpu

In [None]:
import json
import numpy
import faiss
from sentence_transformers import SentenceTransformer

In [None]:
with open("egypt_labor_law_chunks_normalized.json", encoding="utf-8") as f:
    chunks = json.load(f)

texts = [c["text"] for c in chunks]
metadatas = [{k: v for k, v in c.items() if k != "text"} for c in chunks]

In [None]:
model = SentenceTransformer("intfloat/multilingual-e5-base")

embeddings = model.encode(
    texts, batch_size=32, normalize_embeddings=True, show_progress_bar=True
).astype("float32")

In [None]:
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)

In [None]:
faiss.write_index(index, "egypt_labor_law.faiss")

with open("egypt_labor_law_metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadatas, f, ensure_ascii=False, indent=2)

In [None]:
with open("egypt_labor_law_texts.json", "w", encoding="utf-8") as f:
    json.dump(texts, f, ensure_ascii=False, indent=2)