In [2]:
pip install faiss-cpu llama-index-vector-stores-faiss

Collecting faiss-cpu
  Using cached faiss_cpu-1.9.0-cp311-cp311-macosx_10_14_x86_64.whl.metadata (4.4 kB)
Collecting llama-index-vector-stores-faiss
  Downloading llama_index_vector_stores_faiss-0.2.1-py3-none-any.whl.metadata (609 bytes)
Using cached faiss_cpu-1.9.0-cp311-cp311-macosx_10_14_x86_64.whl (7.7 MB)
Downloading llama_index_vector_stores_faiss-0.2.1-py3-none-any.whl (3.9 kB)
Installing collected packages: faiss-cpu, llama-index-vector-stores-faiss
Successfully installed faiss-cpu-1.9.0 llama-index-vector-stores-faiss-0.2.1
Note: you may need to restart the kernel to use updated packages.


In [13]:
# 라마인덱스 패키지 임포트
import faiss
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

In [14]:
from dotenv import load_dotenv
import os

# .env 파일에서 환경 변수 로드
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [7]:
# LLM 설정(OpenAI)
Settings.llm = OpenAI(
    temperature=0.5,
    model="gpt-3.5-turbo",
    max_tokens=200,
    context_window=4096,
)

In [9]:
# 데이터 로드 (pdf 파일 로드)
documents = SimpleDirectoryReader("../data", required_exts=[".pdf"]).load_data()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)


In [17]:
# FAISS 인덱스 생성 (1536은 OpenAI 임베딩 차원)
dimension = 1536
faiss_index = faiss.IndexFlatL2(dimension)

In [18]:
# FAISS를 LlamaIndex의 인덱싱 및 검색 파이프라인에 통합
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# 문서 내용을 벡터 DB에 저장
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

In [19]:
# (선택사항) FAISS 인덱스 저장
faiss.write_index(faiss_index, "faiss.index")

# (선택사항) FAISS 인덱스 로드
# loaded_index = faiss.read_index("faiss.index")

In [20]:
# 쿼리 엔진 생성
query_engine = index.as_query_engine()

# 쿼리 실행
query = "이 논문에서 제안하는 모델의 장점은 무엇인가요?"
response = query_engine.query(query)

# 응답 출력
print("\n질문:", query)
print("답변:", response)


질문: 이 논문에서 제안하는 모델의 장점은 무엇인가요?
답변: The advantages of the model proposed in this paper include providing accurate information based on reliable sources, utilizing up-to-date information, and being able to cite the sources of the information provided, which enhances the credibility of the answers.
