In [None]:
import faiss
import json
import os
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.schema import Document
from sentence_transformers import SentenceTransformer

# 경로 설정
INDEX_PATH = "./vectordb/dais_vectorstore.index"                # 기존 .index 파일 경로
METADATA_PATH = "./vectordb/dais_vectorstore_meta.jsonl"        # 기존 .jsonl 파일 경로
OUTPUT_FOLDER = "./vectordb/"                                   # 변환 후 저장 폴더

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. FAISS 인덱스 로드
index = faiss.read_index(INDEX_PATH)

# 2. 메타데이터(jsonl) → Document 리스트로 변환
documents = []
with open(METADATA_PATH, "r", encoding="utf-8") as f:
    for line in f:
        meta = json.loads(line)
        doc = Document(
            page_content=meta.get("output", ""),  # 문서 본문 필드명에 맞게 수정
            metadata={k: v for k, v in meta.items() if k != "output"}
        )
        documents.append(doc)

In [3]:
# 3. Docstore 및 인덱스 매핑 생성
docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(documents)})
index_to_docstore_id = {i: str(i) for i in range(len(documents))}

# 4. 임베딩 함수 준비 (예시: sentence-transformers)
EMBEDDING_MODEL_NAME = "nlpai-lab/KURE-v1"
kure_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
class KURELangChainEmbeddings:
    def __init__(self, model):
        self.model = model
    def embed_documents(self, texts):
        return self.model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
    def embed_query(self, text):
        return self.model.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
kure_lc_embeddings = KURELangChainEmbeddings(kure_model)

In [4]:
# 5. LangChain FAISS 객체 생성
vectorstore_rag = FAISS(
    embedding_function=kure_lc_embeddings,
    index=index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id
)

# 6. LangChain 표준 폴더(`index.faiss`, `index.pkl`)로 저장
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
vectorstore_rag.save_local(OUTPUT_FOLDER)
print(f"LangChain FAISS 벡터스토어를 {OUTPUT_FOLDER} 폴더에 저장 완료!")
print(f"- {OUTPUT_FOLDER}/index.faiss")
print(f"- {OUTPUT_FOLDER}/index.pkl")

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


LangChain FAISS 벡터스토어를 ./5e-5/ 폴더에 저장 완료!
- ./5e-5//index.faiss
- ./5e-5//index.pkl
