# Create RAG Chain

## Library Import

In [7]:
import os
import re
import fitz
import glob

from langchain_community.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from resource.config import api_key

## Open AI API Authencation key

In [8]:
os.environ['OPENAI_API_KEY'] = api_key

## Data Load

In [9]:
pdf_path = './data/manual/'
csv_path = './data/hospital_loc/hospital_loc.csv'

## Define RAG Chainer

In [10]:
class EmergencyRAGChainer:
  def __init__(self, dir_path, csv_path):
    self.dir_path = dir_path
    self.csv_path = csv_path
    self.csv_loader = CSVLoader(self.csv_path)
    self.text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap  = 100,
        length_function = len
    )
    self.embedding_model = OpenAIEmbeddings()
    template = '''Answer the question in korean, based only on the following context:
    {context}

    Question: {question}
    '''
    self.prompt = ChatPromptTemplate.from_template(template)
    self.model = ChatOpenAI(
        model='gpt-4o-mini',
        temperature=0,
        max_tokens=500,
    )

  def extract_text_pdf(self, pdf_path):
      docs = fitz.open(pdf_path)
      elements = []
      for page in docs:
          text = page.get_text()
          elements.append({'text':text.strip()})
      docs.close()
      return elements

  def load_data(self):
    pdf_files = glob.glob(os.path.join(self.dir_path, '**', '*.*'), recursive=True)
    pdf_docs = []
    for pdf_file in pdf_files:
      pdf_docs.extend(self.extract_text_pdf(pdf_file))

    csv_docs = self.csv_loader.load()

    return pdf_docs, csv_docs

  def preprocess(self, pdf_docs, csv_docs):
    cleaned_pdf = []
    for doc in pdf_docs:
      if doc['text']:
        doc['text'] = re.sub('^[ㄱ-ㅎㅏ-ㅣ가-힣 ]', '', doc['text']).replace('▶', '').replace('·', '')\
                                                                    .replace(' · ', '').replace('\n', ' ')\
                                                                    .replace('\x01', ' ').replace('\x07', '')\
                                                                    .replace('•', '').replace('※', '')
        doc['text'] = re.sub(r'\s+', ' ', doc['text'])
        cleaned_pdf.append(doc)

    for doc in csv_docs:
      doc.page_content = re.sub('^[ㄱ-ㅎㅏ-ㅣ가-힣0-9a-zA-Z ]', '', doc.page_content).replace('\ufeff', '').replace('\n', ' ')

    return cleaned_pdf, csv_docs

  def text_split(self, pdf_docs, csv_docs):
    splits = []

    for doc in pdf_docs:
      if len(doc['text']) > 10:
        splits.extend(self.text_splitter.split_text(doc['text']))

    for doc in csv_docs:
        splits.extend(self.text_splitter.split_text(doc.page_content))

    return splits

  def create_vectorstore(self, splits):
    return Chroma.from_texts(
        splits,
        self.embedding_model,
        collection_name='emergency_manual',
        persist_directory='./db/chromadb_1',
        collection_metadata={'hnsw:space': 'cosine'}
    )

  def create_retriever(self, vector_store):
    return vector_store.as_retriever(
      search_type="mmr",
      search_kwargs={
          "k": 20,
          "alpha": 0.5,
      }
    )

  def format_docs(self, docs):
      return '\n\n'.join([d.page_content for d in docs])

  def create_rag_chain(self):
    pdf_docs, csv_docs = self.load_data()
    pdf_docs, csv_docs = self.preprocess(pdf_docs, csv_docs)
    splits = self.text_split(pdf_docs, csv_docs)
    vector_store = self.create_vectorstore(splits)
    retriever = self.create_retriever(vector_store)

    return {'context': retriever | self.format_docs, 'question':RunnablePassthrough()} | self.prompt | self.model | StrOutputParser()

## Create Chain

In [11]:
chainer = EmergencyRAGChainer(pdf_path, csv_path)

chain = chainer.create_rag_chain()

## Invoke Chain

In [12]:
query = '심정지 상황 응급처치 절차'

print(chain.invoke(query))

심정지 상황에서의 응급처치 절차는 다음과 같습니다:

1. **안전 확인**: 주변 환경이 안전한지 확인합니다.
2. **반응 확인**: 환자를 가볍게 흔들거나 소리쳐서 반응이 있는지 확인합니다.
3. **응급전화 요청**: 반응이 없으면 즉시 119에 전화하여 도움을 요청합니다.
4. **CPR 시작**: 
   - 환자를 평평한 바닥에 눕히고, 가슴 중앙에 손을 겹쳐 놓습니다.
   - 팔을 곧게 펴고, 체중을 실어 가슴을 약 5~6cm 깊이로 압박합니다.
   - 30회의 압박 후, 기도를 확보하고 인공호흡 2회를 실시합니다.
   - 이 과정을 반복합니다.
5. **자동심장충격기(AED) 사용**: AED가 있다면 즉시 사용하여 지시에 따릅니다.

이 절차를 통해 심정지 환자의 생명을 구할 수 있습니다.
