In [1]:
import os
import sys
import json
import time
import requests

from dotenv import load_dotenv

def setup_env():
    
    env_path = os.path.join(os.getcwd(), '../.env')

    if os.path.exists(env_path):
        load_dotenv(dotenv_path=env_path)
        
        print(f"Loaded environment variables from: \033[94m{env_path}\033[0m")
    else:
            print("\033[91mError: .env file not found. Please create one with your OPENAI_API_KEY.\033[0m")
            sys.exit(1)

setup_env()

Loaded environment variables from: [94m/home/ras/0.agent_ai_ws/src/learn_rag_and_agent/learn_rag_and_agent/../.env[0m


In [2]:
from langchain.storage import LocalFileStore
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain_community.vectorstores.faiss import FAISS

# OpenAI 임베딩을 사용하여 기본 임베딩 설정
embedding = OpenAIEmbeddings()

# 로컬 파일 저장소 설정
store = LocalFileStore("./cache/")

# 캐시를 지원하는 임베딩 생성
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings=embedding,
    document_embedding_cache=store,
    namespace=embedding.model,  # 기본 임베딩과 저장소를 사용하여 캐시 지원 임베딩을 생성
)

In [3]:
# store에서 키들을 순차적으로 가져옵니다.
list(store.yield_keys())

['text-embedding-ada-002cc824f84-d691-544f-9d9c-ca7e45470bb2',
 'text-embedding-ada-0029db9e1cd-62d8-50fc-94f4-24bef3cacaf5',
 'text-embedding-ada-00241e7391b-b68f-5e9f-bb07-3609bb83c3e2',
 'llm_cache.db',
 'text-embedding-ada-0027494a7c8-3399-52a1-85ef-f4d0a563d31f',
 'text-embedding-ada-0020fd71f95-1342-512d-9d5b-3e3ab3c6bbe0',
 'text-embedding-ada-0022112b0ec-6ade-59c9-b09c-755b33c3d32c',
 'text-embedding-ada-00274ae75af-9058-555e-aefa-082f0b4e0560',
 'hub/models--beomi--llama-2-ko-7b/blobs/6665b3487fa86ca6701a1921b710117e0f78d604',
 'hub/models--beomi--llama-2-ko-7b/blobs/89b127dcb3c85427a3c9224224c60f344d45eacdb729e4352a42224bdbed76ba',
 'hub/models--beomi--llama-2-ko-7b/blobs/342d75efdb4bffcd94f59df101808425c2c7a95e',
 'hub/models--beomi--llama-2-ko-7b/blobs/f6986fbe739e40bcf83ce679c687b98aa9dce2b0621604e70e80f76ad5e13ada',
 'hub/models--beomi--llama-2-ko-7b/blobs/824354aba3909363d96c7958c4d1ef2d46cfd0150ad7dbf4a12ae7e6388dc8ab',
 'hub/models--beomi--llama-2-ko-7b/blobs/ceaedeb4b

In [4]:
from langchain.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

# 문서 로드
raw_documents = TextLoader("appendix-keywords.txt").load()
# 문자 단위로 텍스트 분할 설정
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# 문서 분할
documents = text_splitter.split_documents(raw_documents)

In [5]:
# 코드 실행 시간을 측정합니다.
%time db = FAISS.from_documents(documents, cached_embedder)  # 문서로부터 FAISS 데이터베이스 생성

CPU times: user 102 ms, sys: 1.83 ms, total: 104 ms
Wall time: 108 ms


In [6]:
# 캐싱된 임베딩을 사용하여 FAISS 데이터베이스 생성
%time db2 = FAISS.from_documents(documents, cached_embedder)

CPU times: user 1.42 ms, sys: 5.34 ms, total: 6.76 ms
Wall time: 5.34 ms
