In [19]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException


from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.chat_models import ChatOpenAI
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
    AgentTokenBufferMemory,
)
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.prompts import MessagesPlaceholder
from langchain.schema.messages import SystemMessage
from langchain.agents import AgentExecutor
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader

from time import sleep
from typing import List, Optional
import time
import requests
import nest_asyncio
import re
import logging

logger = logging.getLogger(__name__)

nest_asyncio.apply()

limited_cdata_texts = None
final_response = None
law_name = None
Answer = None

class wait_for_text_change:
    def __init__(self, locator, expected_text):
        self.locator = locator
        self.expected_text = expected_text

    def __call__(self, driver):
        element_text = driver.find_element(*self.locator).text
        return element_text != self.expected_text


class TextLoader(BaseLoader):
    """Load text data directly.

    Args:
        text_data: String containing the text data.
        source: Optional source information for the text data.
    """

    def __init__(
        self,
        text_data: str,
        source: Optional[str] = None
    ):
        """Initialize with text data."""
        self.text_data = text_data
        self.source = source

    def load(self) -> List[Document]:
        """Load from text data."""
        try:
            text = self.text_data
        except Exception as e:
            raise RuntimeError("Error processing text data") from e

        metadata = {"source": self.source}
        return [Document(page_content=text, metadata=metadata)]

        
# 특수 키워드 변환 규칙

conversion_rules = {
    '사생활': '형법',
    '녹음': '형법',
    '명의 도용' : '전자통신사업법',
    '명의' : '형법',
    '도용' : '형법',
    '임대차' : '주택임대차보호법',
    '사기' : '형법',
    '모욕' : '형법'
    # 다른 규칙들도 추가 가능
}

# 법령 Text 파일 저장 경로
# TextFilePath = '/home/user/exercise_j/AIchatbot-Neoul/law_example_2.txt'

# 질문 입력값
input_data = "교통사고가 나서 차량이 심하게 손상됐는데 어떤 법적 조치를 취해야하지?"

In [20]:
def generate_law_keyword(input_data, conversion_rules):
    def handle_sensitive_response(response):
        # 모델의 응답이 공백인 경우 민감한 내용으로 간주
        if response.strip().endswith("the most important keyword for a law database would be:"):
            return "죄송하지만, 이 주제에 대해서는 법률적 조언을 제공할 수 없습니다. 전문가의 도움을 받으시길 권장합니다."
        else:
            return response

    # 프롬프트 템플릿 설정
    prompt = ChatPromptTemplate.from_template("Given the input, extract most important one keyword for the law database only in Korean. Input: {input}\nKeywords:")

    # 모델 설정
    model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview", openai_api_key="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")

    # 체인 설정: 모델 출력을 키워드로 제한
    chain = prompt | model.bind(stop=["\n"])

    # 체인 실행
    result = chain.invoke({"input": input_data})
    final_response = handle_sensitive_response(result.content)

    for keyword, new_value in conversion_rules.items():
        if keyword in final_response:
            final_response = new_value
    
    for keyword, new_value in conversion_rules.items():
        if '죄' in final_response:
            final_response ='형법'

    return final_response


In [21]:

def process_law_info(final_response):
    global limited_cdata_texts
    def extract_cdata(xml_data):
        cdata_sections = re.findall(r'<!\[CDATA\[(.*?)\]\]>', xml_data, re.DOTALL)
        return [cdata.strip() for cdata in cdata_sections]

    def limit_tokens(texts, max_tokens=12000):
        tokenized_texts = [word for text in texts for word in text.split()]
        return ' '.join(tokenized_texts[:max_tokens])
    
    def search_law(response, retries=5):
        for attempt in range(retries):
            try:
                options = webdriver.ChromeOptions()
                options.add_argument('--headless')
                options.add_argument('--no-sandbox')
                options.add_argument('--disable-dev-shm-usage')
                if 'driver' in globals():
                    driver.quit()

                driver = webdriver.Chrome(options=options)
                print(f"검색 시도: {attempt + 1}, 검색어: '{response}'")
                driver.get("https://glaw.scourt.go.kr/wsjo/lawod/sjo120.do")
                
                if 'original_text' in globals():
                    original_text = None

                original_text = driver.find_element(By.CSS_SELECTOR, 'h3.search_result_num').text

                search_box = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.NAME, "srchw"))
                )
                search_box.clear()
                driver.execute_script("arguments[0].value = arguments[1];", search_box, response)
                search_box.send_keys(Keys.RETURN)

                WebDriverWait(driver, 10).until(
                    # wait_for_text_change 함수는 정의되어 있어야 합니다.
                    wait_for_text_change((By.CSS_SELECTOR, 'h3.search_result_num'), original_text)
                )
                
                popularity_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn_type_5[name='sort_popularity']"))
                )
                popularity_button.click()

                time.sleep(5)

                first_result = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, 'td a[name="listCont"] strong'))
                )
                print(f"검색 완료, 첫 번째 결과: {first_result.text}")
                law_name = first_result.text
                return law_name
            except (TimeoutException, WebDriverException) as e:
                print(f"재시도 {attempt + 1}/{retries}, 오류: {e}")
                continue
        print("검색 실패, 결과를 찾을 수 없음.")
        return None 

    def fetch_data(url, params, max_retries=10, delay=1):
       
        """지정된 횟수만큼 요청을 재시도하는 함수"""
        for attempt in range(max_retries):
            try:
                response = requests.get(url, params=params)
                response.raise_for_status()  # 상태 코드가 200이 아닌 경우 예외를 발생시킵니다.
                return response.text
            except requests.RequestException as e:
                print(f"요청 실패 (시도 {attempt + 1}/{max_retries}): {e}")
                sleep(delay)  # 지정된 시간만큼 대기 후 다시 시도
        return None  # 모든 시도가 실패한 경우 None을 반환

    law_name = search_law(final_response)
    if law_name:
        print(f'국가법령정보센터에서 {law_name}에 관한 정보를 불러옵니다.')

        # API의 기본 URL 설정
        base_url = "http://www.law.go.kr/DRF/lawService.do"

        # 요청에 필요한 파라미터 설정
        params = {
            'OC': 'cwindy200',    # 사용자 ID
            'target': 'law',      # 서비스 대상
            'LM': law_name,       # 법령 마스터 번호
            'type': 'XML'         # 출력 형태 (HTML 또는 XML)
        }

        # 함수를 사용하여 데이터 가져오기
        response_text = fetch_data(base_url, params)
        if response_text:
            cdata_texts = extract_cdata(response_text)

        # 토큰 제한 적용
            limited_cdata_texts = limit_tokens(cdata_texts)
            
            print(f"법령 텍스트 변환: {limited_cdata_texts}")
            return limited_cdata_texts
        else:
            print("모든 요청이 실패했습니다.")
            return None
    else:
        print("정보를 찾을 수 없습니다.")
        return None

In [22]:
def execute_legal_advice_agent(input_data):
    if 'db' in globals():
        db.delete()
    loader = TextLoader(limited_cdata_texts)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=50000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings(openai_api_key="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")
    db = FAISS.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    tool = create_retriever_tool(
        retriever,
        "search_legal_advice",
        "searches and returns answers regarding legal advice and information from Document",
    )
    tools = [tool]

    llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview", openai_api_key="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")

    memory_key = "history"

    memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)

    system_message = SystemMessage(
        content=(
            "Must not repeat the content found in the Document verbatim."
            "Must use tools to look up relevant information from the Document."
            "Must double-check to ensure the grammar of the Korean answer is correct."
        )
    )

    prompt = OpenAIFunctionsAgent.create_prompt(
        system_message=system_message,
        extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
    )

    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)

    agent_executor = AgentExecutor(
        agent=agent,
        tools=tools,
        memory=memory,
        verbose=True,
        return_intermediate_steps=True,
    )

    result = agent_executor({"input": input_data})

    for message in result['history']:
        if message.__class__.__name__ == 'AIMessage':
            Answer = message.content
            print(message.content)
            return Answer


In [23]:
def integrated_law_process(input_data):
    # 전역 변수 'conversion_rules' 사용
    global conversion_rules

    # Step 1: 법률 관련 키워드 생성
    final_response = generate_law_keyword(input_data, conversion_rules)

    # Step 2: 법률 정보 처리 및 텍스트 파일 생성
    process_law_info(final_response)

    # Step 3: 법률 자문 에이전트 실행
    execute_legal_advice_agent(input_data)

In [24]:
integrated_law_process(input_data)
print(final_response)
print(law_name)
print(Answer)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: HTTPSConnectionPool(host='api.openai.com', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3821fa7ac0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution')).
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: You exceeded your current quota, please check your plan and billing details..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: You exceeded your current quota, please check your plan and billing details..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<loca

RateLimitError: You exceeded your current quota, please check your plan and billing details.