# 시스템 설정
1. 역할: 귀하의 임무는 문서를 분석하고 상세하고 통찰력 있는 답변을 제공하는 인공지능 모델 연구 강연자이십니다. 
2. 참고 문서: {retriever}
3. 강연 대상: 강연 대상은 불특정 다수로, 초대형 언어 모델을 모르는 사람도 존재합니다. 이런 경우를 고려하여 대답해야합니다. 
4. 추가 고려 사항: 질문을 받고 답변을 이해하기 쉽게 답변을 하기 전 부연 설명을 해줘, 이때 참고 문서의 내용을 먼저 고려하고 내용이 부족하다고 판단 되면 추가 구글을 통해 검색을 진행해서 답변을 진행해야합니다. 

In [None]:
system = f"""
1. Your role: Your job is to analyze the document and provide detailed and insightful answers as a speaker on AI model research. 
2. reference document: {retriever}
3. Audience: Your audience is unspecified, and some people may not know about very large language models. You will have to answer considering this case. 
4. additional considerations: When you are asked a question, you should give an explanation before answering to make your answer easy to understand, so you should consider the content of the reference document first, and if you find the content insufficient, you should do additional Google searches before answering. 
"""

In [2]:
# KEY설정
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("OpenAI API key 입력: ")

# 모델로드
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

model = ChatOpenAI(model = "gpt-4o-mini", temperature = 0.3, max_tokens = 1000)

# 파일 로드
from langchain.document_loaders import PyPDFLoader

pdf_path = "초거대언어모델연구동향.pdf"

loader = PyPDFLoader(pdf_path)

docs = loader.load()

for doc in docs:
    utf8_docs = [doc.page_content.encode('utf-8').decode('utf-8') for doc in docs]

# 문서 청크 나누기 2
from langchain.text_splitter import RecursiveCharacterTextSplitter

recursive_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=10,
    length_function=len,
    is_separator_regex=False,
)

splits = recursive_text_splitter.split_documents(docs)

    
# 벡터 임베딩 생성
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model = "text-embedding-ada-002")

# 벡터 스토어 생성
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
import faiss

vector_dim = len(embeddings.embed_query("example text")) 
index = faiss.IndexFlatL2(vector_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

# 리트리버 변환

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})

# 템플릿 정의
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# 시스템 설정
system_message = """
1. role: Your job is to analyze the document and provide detailed and insightful answers as a speaker on AI model research. 
2. reference document: {splits}
3. Audience: Your audience is unspecified, and some people may not know about very large language models. You will have to answer considering this case. 
4. additional considerations: When you are asked a question, you should give an explanation before answering to make your answer easy to understand, so you should consider the content of the reference document first, and if you find the content insufficient, you should do additional Google searches before answering. 
"""

contextual_prompt = ChatPromptTemplate.from_messages([
    ("system", system_message),
    ("user", "Context: {context}\n\nQuestion: {question}\n\nAnswer: Please provide a comprehensive response, focusing on the key trends and research findings mentioned in the document.")
])

# 질문 응답 체인 구성
class DebugPassThrough(RunnablePassthrough):
    def invoke(self, *args, **kwargs):
        output = super().invoke(*args, **kwargs)
        print("Debug Output:", output)
        return output

class ContextToText(RunnablePassthrough):
    def invoke(self, inputs, config=None, **kwargs):
        context_text = "\n".join([doc.page_content for doc in inputs["context"]])
        return {"context": context_text, "question": inputs["question"]}

# 질문 반복 처리
rag_chain_debug = {
    "context": retriever,   # 컨텍스트를 가져오는 retriever
    "question": DebugPassThrough()     # 사용자 질문이 그대로 전달되는지 확인하는 passthrough
}  | DebugPassThrough() | ContextToText()| contextual_prompt | model

# 출력
while True:
    print('=====================')
    query = input("질문을 입력하세요: ")

    response = rag_chain_debug.invoke(query)
    
    print("Final Response: ")
    print(response.content)

OpenAI API key 입력: ········
질문을 입력하세요: 초대형 언어모델 주요 연구 동향을 설명해줘
Debug Output: 초대형 언어모델 주요 연구 동향을 설명해줘
Debug Output: {'context': [], 'question': '초대형 언어모델 주요 연구 동향을 설명해줘'}


KeyError: "Input to ChatPromptTemplate is missing variables {'splits'}.  Expected: ['context', 'question', 'splits'] Received: ['context', 'question']\nNote: if you intended {splits} to be part of the string and not a variable, please escape it with double curly braces like: '{{splits}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT"