# 1.라이브러리 임포트

In [1]:
import os
import torch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import JSONLoader
from dotenv import load_dotenv

# .env 파일 로드
load_dotenv()

# OpenAI API 키를 환경 변수에서 로드
openai_api_key = os.getenv("OPENAI_API_KEY")

# 2.JSON데이터 로드 및 처리

In [3]:
# JSON 파일 경로 설정
file_path = 'merged_data.json'

# JSONLoader의 jq_schema 설정
jq_schema = """
.[] | {
    "filename": .filename,
    "date": .date,
    "conference_number": .conference_number,
    "question_number": .question_number,
    "meeting_name": .meeting_name,
    "generation_number": .generation_number,
    "committee_name": .committee_name,
    "meeting_number": .meeting_number,
    "session_number": .session_number,
    "agenda": .agenda,
    "law": .law,
    "qna_type": .qna_type,
    "context": .context,
    "context_summary": {
        "summary_q": .context_summary.summary_q,
        "summary_a": .context_summary.summary_a
    },
    "questioner": {
        "name": .questioner_name,
        "affiliation": .questioner_affiliation,
        "position": .questioner_position
    },
    "question": {
        "comment": .question.comment,
        "keyword": .question.keyword
    },
    "answerer": {
        "name": .answerer_name,
        "affiliation": .answerer_affiliation,
        "position": .answerer_position
    },
    "answer": {
        "comment": .answer.comment,
        "keyword": .answer.keyword
    }
}
"""

# JSON 파일 로드
loader = JSONLoader(file_path, jq_schema=jq_schema, text_content=False)
documents = loader.load()

# 3.문서 분할 및 임베딩 생성

In [None]:
# 문서 분할 및 임베딩 생성
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)

# CUDA 사용 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 임베딩 모델 설정 (GPU 사용)
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs={'device': device}
)

# FAISS 인덱스 설정
index_path = 'faiss_index'

# VectorStore 생성 또는 로드
if os.path.exists(index_path):
    print("저장된 FAISS 인덱스를 로드합니다...")
    vectorstore = FAISS.load_local(
        index_path,
        embeddings=embedding_model,
        allow_dangerous_deserialization=True
    )
else:
    print("FAISS 인덱스를 생성합니다...")
    vectorstore = FAISS.from_documents(split_docs, embedding_model)
    vectorstore.save_local(index_path)

  embedding_model = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange


저장된 FAISS 인덱스를 로드합니다...


# 4.Retriever 및 도구 정의

In [None]:
# Retriever 생성
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# 도구 함수 정의
def json_search_tool(input_text):
    docs = retriever.get_relevant_documents(input_text)
    if docs:
        summaries = [doc.page_content for doc in docs]
        return '\n\n'.join(summaries)
    else:
        return "해당하는 정보를 찾을 수 없습니다."

# 도구 정의
json_search_tool = Tool(
    name="JSONSearch",
    func=json_search_tool,
    description=(
        "Use this tool to search for information about a specific person or issue from the JSON document. "
        "Provides a summary of fields such as conference number, meeting name, generation number, committee name, agenda, law, Q&A type, context, learning context, "
        "context summary, questioner, and question."
    )
)


# 6. 도구 목록 생성

In [None]:
# 도구 목록 생성
tools = [json_search_tool]

# LLM 정의
llm = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key=openai_api_key)

# 메모리 설정
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  llm = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key=openai_api_key)
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [None]:
# 에이전트 초기화
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    verbose=True,
    memory=memory,
)

  agent = initialize_agent(


In [None]:
# 시스템 프롬프트 설정
agent.agent.llm_chain.prompt.messages[0].prompt.template = ("Be sure to answer in Korean"
    "You are an AI assistant that helps users find information about individuals or issues from the National Assembly JSON document. "
    "When a user asks about a specific person or issue, you should use the JSONSearch tool to find meeting information where that person is mentioned. "
    "Provide a summarized response including fields such as conference number, meeting name, generation number, committee name, agenda, law, Q&A type, context, learning context, context summary, questioner, and question. "
    "If you cannot find the information in the JSON document, politely inform the user that the information is not available. "
    "Do not mention the use of tools; just provide the necessary information."
) 

In [None]:
# 에이전트와 대화하는 함수 정의
def chat_with_agent(user_input):
    response = agent.run(input=user_input)
    return response

In [None]:
# 예시 질문
user_input = "김영삼 대통령이 언급된 회의에 대한 정보를 알려주세요.반드시 안건 이름과 날짜 어떤 회의 였는지 몇대 국회였는지 알려주세요. 질문자와 답변자에 대한 정보와 어떤 안건에 대해 이야기가 오고 갔는지 대화문도 알려주세요."
response = chat_with_agent(user_input)
print("Assistant:", response)


  response = agent.run(input=user_input)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "JSONSearch",
    "action_input": "김영삼"
}
```[0m

  docs = retriever.get_relevant_documents(input_text)



Observation: [36;1m[1;3m1000m\ub97c \ucd08\uacfc\ud558\ub294 \uaddc\uc815\uc744 \uc9c0\ubc29\uc790\uce58\ub2e8\uccb4\uc5d0 \uc704\uc784\ud558\ub418, \uad50\uc721\uac10\uc774 \ud544\uc694 \uc2dc \uc870\ub840\ub85c \uacb0\uc815\ud560 \uc218 \uc788\ub3c4\ub85d \ud558\ub294 \uc81c\uc548\uc785\ub2c8\ub2e4."}, "questioner": {"name": "\uae40\ud604\uc544", "affiliation": "", "position": "\uc704\uc6d0"}, "question": {"comment": "\ub300\ud1b5\ub839\ub839\uc73c\ub85c \uc815\ud558\ub294 \uacbd\uc6b0 1000m\ub97c \ucd08\uacfc\ud560 \uc218 \uc788\ub2e4\ub294 \uac83 \uc9c0\ubc29\uc790\uce58\ub2e8\uccb4\uc5d0\ub2e4 \uc704\uc784\ud558\ub294 \uac70\ub77c\ub294 \uac70\uace0\uc694?", "keyword": "\ub300\ud1b5\ub839\ub839, 1000m, \ucd08\uacfc, \uc9c0\ubc29\uc790\uce58\ub2e8\uccb4, \uc704\uc784"}, "answerer": {"name": "\ubc15\ubc31\ubc94", "affiliation": "\uad50\uc721\ubd80", "position": "\ucc28\uad00"}, "answer": {"comment": "\uadf8\ub7ec\ub2c8\uae4c \uc9c0\uae08 \uc548\uc740 \uc2e0\uc124 \uc548\uc778\ub3

In [None]:
# 예시 질문
user_input = "이전의 질문과 상관 없는 새로운 질문입니다. 가장 마지막 회의록에.반드시 안건 이름과 날짜 어떤 회의 였는지 몇대 국회였는지 알려주세요. 질문자와 답변자에 대한 정보와 어떤 안건에 대해 이야기가 오고 갔는지 대화문도 알려주세요."
response = chat_with_agent(user_input)

print("Assistant:", response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "JSONSearch",
    "action_input": "가장 마지막 회의록"
}
```[0m
Observation: [36;1m[1;3m{"filename": "SRC_20\ub300_\uc81c371\ud68c_\uc81c2\ucc28_\uc18c\uc704\uc6d0\ud68c_\uad6d\ud1a0\uad50\ud1b5\uc704\uc6d0\ud68c-\uc608\uc0b0\uacb0\uc0b0\uae30\uae08\uc2ec\uc0ac\uc18c\uc704\uc6d0\ud68c_0002(049519).xlsx", "date": "2019\ub14410\uc6d431\uc77c(\ubaa9)", "conference_number": "049519", "question_number": "0002", "meeting_name": "\uc18c\uc704\uc6d0\ud68c", "generation_number": "20", "committee_name": "\uad6d\ud1a0\uad50\ud1b5\uc704\uc6d0\ud68c-\uc608\uc0b0\uacb0\uc0b0\uae30\uae08\uc2ec\uc0ac\uc18c\uc704\uc6d0\ud68c", "meeting_number": "\uc81c371\ud68c", "session_number": "\uc81c2\ucc28", "agenda": "1. 2020\ub144\ub3c4 \uc608\uc0b0\uc548(\uacc4\uc18d)\uac00. \uad6d\ud1a0\uad50\ud1b5\ubd80 \uc18c\uad00\ub098. \ud589\uc815\uc911\uc2ec\ubcf5\ud569\ub3c4\uc2dc\uac74\uc124\uccad \uc18c\uad00\ub2e4. \uc0c8\ub9cc\uae08\ua