In [5]:
from langchain.document_loaders import TextLoader

documents = TextLoader("AI.txt").load()

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter


# 문서를 청크로 분할
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
    text_spliter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    docs = text_spliter.split_documents(documents)
    return docs


# docs변수에 분할 문서를 저장
docs = split_docs(documents)

In [None]:
# openAI Imbedding model
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", api_key=api_key)

from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embeddings, persist_directory="data")

In [None]:
from langchain.chat_models import ChatOpenAI

model = "gpt-4o"
llm = ChatOpenAI(model_name=model, api_key=api_key)

# QnA체인을 사용하여 쿼리에 대한 답변 얻기
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(llm, chain_type="stuff", verbose=True)

# 쿼리를 작성하고 유사성 검색을 수행하여 답변을 생성, 따라서 임베딩 한 내용 중 질의해야한다.
query = "AI란?"
matching_docs = db.similarity_search(query)
answer = chain.run(input_documents=matching_docs, question=query)
answer

  llm = ChatOpenAI(model_name=model, api_key=api_key)
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm, chain_type='stuff', verbose=True)
Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
  answer = chain.run(input_documents = matching_docs, question=query)




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Artificial intelligence (AI) is the intelligence of machines or software, as opposed to the intelligence of humans or animals. It is a field of study in computer science that develops and studies intelligent machines. Such machines may be called AIs.

AI technology is widely used throughout industry, government, and science. Some high-profile applications are: advanced web search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix), understanding human speech (such as Google Assistant, Siri, and Alexa), self-driving cars (e.g., Waymo), generative and creative tools (ChatGPT and AI art), and superhuman play and analysis in strategy g

'AI, 또는 인공지능은 인간이나 동물의 지능이 아닌 기계나 소프트웨어의 지능을 의미합니다. 컴퓨터 과학의 한 분야로, 인공지능을 갖춘 기계를 개발하고 연구하는 것을 목표로 합니다. AI 기술은 산업, 정부, 과학 분야에서 널리 사용되며, 웹 검색 엔진, 추천 시스템, 음성 인식, 자율주행차, 생성 및 창작 도구, 전략 게임 분석 등 다양한 분야에서 활용되고 있습니다. 인공지능은 1956년에 학문 분야로서 설립되었으며, 최근에는 딥러닝과 트랜스포머 아키텍처의 발전으로 인해 많은 관심과 자금이 집중되고 있습니다. AI 연구는 추론, 지식 표현, 계획, 학습, 자연어 처리, 지각, 로봇공학 지원 등 다양한 목표를 중심으로 이루어지고 있습니다.'