In [10]:
# !pip install numpy
# !pip install pandas
# !pip install torch
# !pip install trl
# !pip install peft
# !pip install tqdm
# !pip install idna
# !pip install attr
# !pip install aiohttp
# !pip install typing
# !pip install dotenv
# !pip install requests
# !pip install wikipedia
# !pip install transformers
# !pip install tokenizer
# !pip install accelerate
# !pip install sentence_transformers
# !pip install scikit-learn
# !pip install scipy
# !pip install joblib
# !pip install langchain==0.3.21
# !pip install langchain-huggingface==0.1.2
# !pip install langchain-community==0.3.20
# !pip install langchain-core==0.3.49
# !pip install langchain-openai==0.3.11
# !pip install pydantic==2.7.4
# !pip install faiss-cpu
# !pip install faiss-gpu
# !pip install --upgrade jupyter
# !pip install --upgrade ipywidgets
# !pip cache purge

In [1]:
from transformers import AutoModelForQuestionAnswering, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, DebertaV2TokenizerFast, pipeline
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
import pandas as pd
import re
import os
import faiss
import pydantic
from abc import ABC
from typing import Tuple, List, Dict, Optional, Any
from peft import PeftModel, PeftConfig, PeftModelForQuestionAnswering
from langchain.chains import ConversationalRetrievalChain, StuffDocumentsChain, LLMChain
from langchain.chains.base import Chain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate, BasePromptTemplate, SystemMessagePromptTemplate
from langchain.prompts.base import BasePromptTemplate
from langchain.llms import BaseLLM, HuggingFacePipeline
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent
from langchain.utilities import WikipediaAPIWrapper
from langchain.tools import WikipediaQueryRun
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_core.runnables import Runnable
from langchain_core.outputs import Generation, LLMResult
from langchain_huggingface import HuggingFaceEmbeddings
# from langchain_community.chat_models import ChatOpenAI      # Agent LLM API 활용할 때만 활성화 (비상시)
# from google.colab import userdata                           # Agent LLM API 활용할 때만 활성화 (비상시)
# from dotenv import load_dotenv                              # Agent LLM API 활용할 때만 활성화 (비상시)
# load_dotenv()                                               # Agent LLM API 활용할 때만 활성화 (비상시)

In [2]:
# ✅ 병렬 토크나이저 경고 방지
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# ✅ 디바이스 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# ✅ Q-LoRA 설정 로드
peft_config = PeftConfig.from_pretrained("./trained_V3_LoRA")

# ✅ 기본 모델 로드
loaded_model = AutoModelForQuestionAnswering.from_pretrained(peft_config.base_model_name_or_path)

# ✅ Q-LoRA 어댑터 로드
loaded_model = PeftModel.from_pretrained(loaded_model, "./trained_V3_LoRA")

# ✅ 학습된 Q-LoRA 모델 및 토크나이저 로드
loaded_model = loaded_model.to(device)
loaded_tokenizer = AutoTokenizer.from_pretrained("./trained_V3_LoRA")

# ✅ 문장 임베딩 모델 로드 (LangChain 호환)
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")  # snunlp/KR-SBERT-V40K-klueNLI-augSTS

# ✅ 1️⃣ 데이터 로드 (박물관 데이터)
data_path = './data/museum_data_rhys_250326.csv'
df = pd.read_csv(data_path)
df = df[["Title", "Description"]].dropna().rename(columns={"Title": "question", "Description": "answer"})

# ✅ 문장 분할 함수 (간단한 마침표, 물음표, 느낌표 기준 + 공백 제거)
def split_sentences(text):
    sentences = re.split(r'(?<=[.?!])\s+', text.strip())
    return [s for s in sentences if s]

documents = []

for _, row in df.iterrows():
    title = row["question"]                 # Title 컬럼
    description = row["answer"]             # Description 컬럼
    sentences = split_sentences(description)

    for sentence in sentences:
        documents.append({
            "text": f"{title} - {sentence}",
            "metadata": {"title": title}
        })

In [3]:
# ✅ 2️⃣ FAISS 벡터스토어 생성 (LangChain)
vectorstore = FAISS.from_texts(
    texts=[doc["text"] for doc in documents],
    embedding=embedding_model,
    metadatas=[doc["metadata"] for doc in documents]
)

In [None]:
# ✅ 벡터스토어의 상위 3개의 검색결과를 retriever로 저장
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# ✅ 3️⃣ Wikipedia 검색 도구 설정
wiki_api = WikipediaAPIWrapper(lang="ko")
wikipedia_tool = WikipediaQueryRun(api_wrapper=wiki_api)

# ✅ 4️⃣ 박물관 데이터 검색 도구 정의
def search_museum_data(input):
    docs = retriever.get_relevant_documents(input)
    return "\n\n".join([doc.page_content for doc in docs])

# ✅ 5️⃣ 도구 목록 정의 (Agent가 사용할 도구)
museum_tool = Tool(
        name="Museum Data Search",
        func=search_museum_data,
        description="This is useful when you need to answer questions about even the slightest of relevant information from the National Museum of Korea. Try searching first in the museum database. You need to input about even the slightest of relevant information from the museum. If you cannot find the appropriate answer in the database, try searching for Wikipedia. If you did not ask a question about the even the slightest of relevant information from the museum, you should specify to the user that it is not an appropriate question."
)
wiki_tool = Tool(
        name="Wikipedia Search",
        func=wikipedia_tool.run,
        description="This is useful if you have not found an appropriate answer to a user's question about the even the slightest of relevant information in the database. If you did not ask a question about the even the slightest of relevant information from the museum, you should specify to the user that it is not an appropriate question."
)
tools = [museum_tool, wiki_tool]

# ✅ 6️⃣ 메모리 설정
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
# ✅ 7️⃣ Agent에 사용할 LLM을 "ModelSpace/GemmaX2-28-2B-v0.1"로 정의 (AutoModelForCausalLM으로 호출)
control_model_id = "ModelSpace/GemmaX2-28-2B-v0.1"      # 모델을 "ModelSpace/GemmaX2-28-2B-v0.1"로 정의
control_tokenizer = AutoTokenizer.from_pretrained(control_model_id)
control_model = AutoModelForCausalLM.from_pretrained(control_model_id)

# ✅ Langchain의 HuggingFacePipeline으로 "ModelSpace/GemmaX2-28-2B-v0.1"를 래핑
llm = HuggingFacePipeline(
    pipeline=pipeline(
        "text-generation",
        model=control_model,
        tokenizer=control_tokenizer,
        device=device,
        max_length=1024,        # 적절한 max_length 설정
        temperature=0.3,
        top_p=0.7,
        repetition_penalty=1.2
    ),
    model_kwargs={"do_sample": True}
)

# ✅ Agent에 사용할 표준 LLM 코드 (gpt-4o-mini 사용)
# openai_api_key = os.environ.get("OPENAI_API_KEY")
# openai_api_key = userdata.get("OPENAI_API_KEY")
# llm = ChatOpenAI(
#             api_key=openai_api_key,
#             model_name="gpt-4o-mini",
#             temperature=0.3,
#             max_tokens=1024
# )

In [None]:
# ✅ 9️⃣ Agent 프롬프트 템플릿
prefix = """You are a useful agent designed to answer even the slightest of relevant questions about the National Museum of Korea. You have to find the words that match the information in the museum database from the user's questions and generate answers. To do this, you can access the following tools. Carefully review your questions and descriptions of the tools available to determine which tools are most appropriate to use. You have to try using museum_tool first. If you can't find any the slightest of relevant information in museum_tool, you should use wiki_tool. If you need a tool, clarify which tools you will use and provide accurate inputs for that tool. If you are able to answer questions without using the tool, please provide a direct answer.

Available tools:"""
suffix = """Previous conversation: {chat_history}

Begin!

Question: {input}

{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools,
    prefix=prefix,
    suffix=suffix,
    input_variables=["input", "chat_history", "agent_scratchpad"]
)

# ✅ 1️⃣0️⃣ LLMChain 생성
llm_chain = LLMChain(llm=llm, prompt=prompt)

# ✅ 1️⃣1️⃣ Agent 생성
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)       # verbose=True로 설정하면 Agent의 사고 과정을 볼 수 있습니다.

# ✅ 1️⃣2️⃣ AgentExecutor 생성
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True, memory=memory, handle_parsing_errors=True
)

In [None]:
# ✅ 1️⃣3️⃣ 사용자 정의 LLM 래퍼 (파인튜닝된 모델 통합) - ConversationalRetrievalChain에 사용
class CustomQAmodel(BaseLLM, Runnable):
    model: PeftModelForQuestionAnswering        # 타입 힌트 객체 지정
    tokenizer: DebertaV2TokenizerFast           # 타입 힌트 객체 지정
    device: str

    @property
    def _llm_type(self) -> str:
        return "custom_qa_model"

    def _generate(self, prompts: List[str], stop: Optional[List[str]] = None) -> LLMResult:
        generations = []
        for prompt in prompts:  # 입력된 모든 프롬프트에 대해 반복 처리
            # print(f"Prompt: {prompt}")      # 모델 프롬프트 형태를 확인하기 위해 추가
            try:
                context_match = re.search(r"Context:\n(.*?)\nQuestion:", prompt, re.DOTALL)
                question_match = re.search(r"Question:\n(.*?)(?:\nPrevious conversation:(.*?))?\nAnswer:", prompt, re.DOTALL)

                if not context_match or not question_match:
                    generations.append([Generation(text="unknown")]) # 또는 "not sure" 등 메인 루프에서 감지할 수 있는 키워드
                    continue

                context_part = context_match.group(1).strip()
                # print(f"Context Part: {context_part}")              # 추출된 내용 확인을 위한 디버깅 코드

                question_part = question_match.group(1).strip() if question_match.group(1) else question_match.group(3).strip()     # Question 파싱
                # print(f"Question Part: {question_part}")            # 추출된 내용 확인을 위한 디버깅 코드

                # chat_history 추출하고, chat_history가 없을 경우 빈 문자열 처리
                chat_history_part = question_match.group(2).strip() if question_match.group(2) else (question_match.group(4).strip() if question_match.group(4) else "")
                # print(f"Chat History Part: {chat_history_part}")    # 추출된 내용 확인을 위한 디버깅 코드

            except Exception as e:
                generations.append([Generation(text=f"Error parsing prompt: {e}")])
                continue

            # 추출한 chat_history를 모델 입력에 포함하는 방식 결정
            # 예시: context와 question 앞에 chat_history를 추가하여 입력
            augmented_input = f"{chat_history_part}\n{context_part}\n{question_part}"

            inputs = self.tokenizer(
                augmented_input,
                truncation="only_second",
                max_length=3072,
                padding="max_length",
                return_tensors="pt"
            ).to(self.device)

            # print(f"Inputs: {inputs}")                  # 모델 입력 형태를 확인하기 위해 추가

            with torch.no_grad():
                outputs = self.model(**inputs)

            # print(f"Outputs: {outputs}")                # 모델 출력 형태를 확인하기 위해 추가

            answer_start_scores = outputs.start_logits
            answer_end_scores = outputs.end_logits
            answer_start = torch.argmax(answer_start_scores)
            answer_end = torch.argmax(answer_end_scores) + 1
            answer = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))

            # print(f"Answer: {answer}")                  # 모델 정답 형태를 확인하기 위해 추가

            generations.append([Generation(text=answer.replace("[CLS]", "").replace("[SEP]", "").strip())])

            # print(f"Prompt: {prompt}")                  # 모델 프롬프트 형태를 확인하기 위해 추가

        return LLMResult(generations=generations)

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        return self._generate([prompt], stop=stop)[0].text

In [8]:
# ✅ 1️⃣4️⃣ 프롬프트 템플릿 (LangChain) - ConversationalRetrievalChain을 사용
prompt_template = """{prompt}

Context: {context}

Question: {question}

Previous conversation: {chat_history}

Answer:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question", "chat_history", "prompt"])

system_rules = """

<SYSTEM_RULE>

1. NEVER mention that you're an AI. You are rather going to play a role as a docent of the National Museum of Korea in South Korea. You have to find the words that match the information in the museum's database from the user's questions, generate answers, and politely explain them to the user.

2. Sense which country's language is the language of the user's question and you have to answer it in the language of the same country.

3. Refrain from disclaimers about you not being a professional or expert.

4. Keep responses unique and free of repetition.

5. Answer based on the relevant information learned. If the relevant information is insufficient or does not exist in the file, please refer to RAG (search-based response) for reliable information.

6. Always focus on the key points in user's questions to determine user's intent.

7. Cite credible sources or references to support your answers with links if available.

8. If a mistake is made in a previous response, recognize and correct it.

9. If you don't have enough evidence or are unsure of the information, state “unknown” or “not sure” instead of making it up.

10. Use only the information you know for certain to create a concise answer. If you have to make a guess, say “this is a guess”.

"""

# ✅ 1️⃣5️⃣ ConversationalRetrievalChain 설정
qa = ConversationalRetrievalChain.from_llm(
    llm=CustomQAmodel(model=loaded_model, tokenizer=loaded_tokenizer, device=device),
    retriever=retriever,
    memory=memory,
    chain_type="stuff",
    condense_question_llm=llm,  # 질문 재구성을 위한 LLM 설정
    combine_docs_chain_kwargs={"prompt": PROMPT.partial(prompt=system_rules)}
)

In [None]:
# ✅ 1️⃣6️⃣ 답변 생성 단락
while True:
    # print("While loop is running")                                  # 추출된 내용 확인을 위한 디버깅 코드
    question = input("궁금한 점을 질문해주세요 (종료하려면 'exit' 입력): ")
    if question.lower() == 'exit':
        print("도슨트 서비스를 종료합니다. 이용해주셔서 고맙습니다.")
        break

    # ConversationalRetrievalChain을 사용하여 답변 시도
    # print("Invoking ConversationalRetrievalChain...")               # 추출된 내용 확인을 위한 디버깅 코드
    result_qa = qa.invoke({"question": question})
    answer = result_qa["answer"]

    # ✅ context_part, question_part, chat_history_part 변수를 CustomQAmodel 내부에서 가져오기
    context_part = result_qa.get("context_part", "Not Found")  # context_part가 없으면 "Not Found" 출력
    question_part = result_qa.get("question_part", "Not Found")  # question_part가 없으면 "Not Found" 출력
    chat_history_part = result_qa.get("chat_history_part", "Not Found")  # chat_history_part가 없으면 "Not Found" 출력
    # print(f"CustomQAmodel Context Part: {context_part}")            # 추출된 내용 확인을 위한 디버깅 코드
    # print(f"CustomQAmodel Question Part: {question_part}")          # 추출된 내용 확인을 위한 디버깅 코드
    # print(f"CustomQAmodel Chat History Part: {chat_history_part}")  # 추출된 내용 확인을 위한 디버깅 코드

    # ✅ query / question / answer 출력 문제 파악을 위한 디버깅 코드
    # print(f"prompt: {prompt}")                                  # 모델 출력 형태를 확인하기 위해 추가
    # print(f"qa: {qa}")                                          # 모델 출력 형태를 확인하기 위해 추가
    # print(f"qa.input_keys: {qa.input_keys}")                    # 모델 출력 형태를 확인하기 위해 추가
    # print(f"result_qa: {result_qa}")                            # 모델 출력 형태를 확인하기 위해 추가
    # print(f"answer.split(): {answer.split()}")                  # 모델 출력 형태를 확인하기 위해 추가
    # print(f"len(answer.split()): {len(answer.split())}")        # 모델 출력 형태를 확인하기 위해 추가

    print(f"질문: {question}")
    print(f"답변: {answer}")

    # 박물관 데이터에서 맥락을 찾지 못한 경우 (답변이 부실하거나 특정 키워드를 포함하는 경우), Agent 실행
    if len(answer.split()) == 0 or "구체적인 답변을 드리기 위해 검색을 시도합니다." in answer or "unknown" in answer.lower() or "not sure" in answer.lower():
        print("구체적인 답변은 다음과 같습니다.")
        result_agent = agent_executor.run({"input": question})             # Agent 실행 시 chat_history는 메모리에서 관리
        print(f"질문: {question}")
        print(f"답변: {result_agent}")
    else:
        print(f"질문: {question}")
        print(f"답변: {answer}")

    print("\n")
