In [None]:
import os
import json
import numpy as np
from pathlib import Path
from openai import OpenAI
import faiss
from dotenv import load_dotenv

load_dotenv("../.env")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

DOCS_DIR = Path("../data")
INDEX_PATH = Path("../vectorstore/faiss.index")
META_PATH = Path("../vectorstore/metadata.json")

INDEX_PATH.parent.mkdir(exist_ok=True)

texts = []
metadata = []

for file in DOCS_DIR.glob("*.txt"):
    text = file.read_text(encoding="utf-8")
    texts.append(text)
    metadata.append({"source": file.name})

if not texts:
    raise ValueError("No documents found in server/data")

embeddings = []

for text in texts:
    emb = client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    embeddings.append(emb.data[0].embedding)

vectors = np.array(embeddings).astype("float32")

index = faiss.IndexFlatL2(vectors.shape[1])
index.add(vectors)

faiss.write_index(index, str(INDEX_PATH))
META_PATH.write_text(json.dumps(metadata, indent=2))

print("RAG ingestion completed WITHOUT LangChain")
