In [1]:
import logging
from hydra import compose, initialize

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

with initialize(version_base=None, config_path="./config"):
    cfg = compose(config_name="properties")

In [2]:
import base64
from omegaconf import DictConfig
from openai import RateLimitError
from operator import itemgetter
from langchain.chains.base import Chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnableParallel, RunnableSequence, RunnableLambda, RunnablePassthrough, ConfigurableField

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def build_chat_model(cfg: DictConfig):
    model = AzureChatOpenAI(
        **cfg.llm.openai
    ).configurable_alternatives(
        ConfigurableField(id="llm_type"),
        default_key="openai"
    )
    return model

def format_messages(inputs: dict):
    system_message = (
        "You are a helpful AI bot." 
        if not inputs.get("system_prompt") else inputs.get("system_prompt")
    )
    # format prompt
    human_messages = [{"type": "text", "text" : inputs["question"]}]
    image_urls = inputs["images"]
    for image_url in image_urls:
        human_messages += [
            {
                "type" : "image_url",
                "image_url" : {"url" : image_url}
            }     
        ]
    return [SystemMessage(content=system_message), HumanMessage(content=human_messages)]

In [None]:
MAX_ATTEMPT = 3
model = build_chat_model(cfg)

chain = (
    RunnableParallel(
        {
            "system_prompt": itemgetter("system_prompt"),
            "question": itemgetter("question"),
            "images": itemgetter("images") | RunnableLambda(
                lambda x: [f"data:image/jpeg;base64,{encode_image(_)}" for _ in x]
            )
        }
    )
    | format_messages
    | model.with_retry(
        retry_if_exception_type=(RateLimitError,),
        stop_after_attempt=MAX_ATTEMPT,
        wait_exponential_jitter=True
    )
)

system_prompt = """You are an Optical Character Recognition machine.
You will extract all the characters from the image provided by the user, and you will only privide the extracted text in your response.
As an OCR machine, You can only respond with the extracted text according to the following intruction.
* Do not modify any of the content in the given image.
* Skip the preamble in your answer.
* Format your answer with structurized information such as markdown or html.
* Do not translate any of the content in the given image. Return as it is."""

response = chain.invoke(
    {
        "system_prompt": system_prompt,
        "question": "이미지에 있는 텍스트를 원본 그대로 추출해줘.",
        "images": ["./data/table_2.png"]
    }
)

In [20]:
print(response.content)

| 은행명 | 상품명 | 금리 | 대상 |
| --- | --- | --- | --- |
| KB국민은행 | KB무궁화 신용대출 | 1.25 | 경찰청 협약 |
|  | KB공무원 우대대출 | 1.99 | 공무원연금공단 협약 |
| 신한은행 | 세미래 행복대출 | 1.70 | 국세청 협약 |
|  | 서울메이트 공무원대출 | 2.19 | 서울특별시 협약 |
| 우리은행 | 공무원 PPL | 2.69 |  |
| 하나은행 | 공무원기계차지대출 | 2.21 | 공무원연금공단 협약 |
| NH농협은행 | e-채움 공무원기계자금 *공지사항은인터넷뱅킹 상품 | 1.72 | 공무원연금공단 협약 |
|  | 공무원생활안정자금 | 1.69 | 공무원연금공단·국군재정관리단 협약 |

*2021년 4월 평균금리 기준, 단 NH농협은행은 최저금리 기준 (자료: 김도균 의원실)
