In [19]:
import chromadb
import pandas as pd 
import openai
import os
import getpass
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index import QueryBundle 
from llama_index.vector_stores import ChromaVectorStore
from llama_index.readers.chroma import ChromaReader
from llama_index import StorageContext, load_index_from_storage, load_indices_from_storage
# from transformers import AutoTokenizer, AutoModel
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceLLM
from llama_index.node_parser import SentenceSplitter 
from llama_index.schema import MetadataMode
from IPython.display import Markdown, display
from llama_index.retrievers import VectorIndexRetriever 
from llama_index.query_engine import RetrieverQueryEngine 
from llama_index.postprocessor import SimilarityPostprocessor 
from llama_index.postprocessor import KeywordNodePostprocessor 
from llama_index.postprocessor import SimilarityPostprocessor, CohereRerank
from llama_index.tools.query_engine import QueryEngineTool, ToolMetadata
from llama_index.schema import Node, NodeWithScore 

In [20]:
data_path = os.path.join('/rag/data/')
index_path = os.path.join('/rag/db/local')

In [14]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
openai.api_key = os.environ["OPENAI_API_KEY"]

OpenAI API Key: ········


In [21]:
model_name = 'kakaobank/kf-deberta-base'
embed_model = HuggingFaceEmbedding(model_name=model_name)

In [22]:
parser = SentenceSplitter(chunk_size=512, chunk_overlap=30)   # SentenceSplitter(chunk_size=1024, chunk_overlap=20)

In [23]:
service_context = ServiceContext.from_defaults(node_parser=parser, embed_model=embed_model, llm=None)

LLM is explicitly disabled. Using MockLLM.


In [24]:
desc_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'desc'))
features_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'features'))
qualification_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'qualification'))

In [25]:
# service_context 전달 안해주면 query 시 dimension 오류 발생 
features_idx = load_index_from_storage(features_storage_context, index_id='loan_tmp', service_context=service_context)
desc_idx = load_indices_from_storage(desc_storage_context, index_ids=['card_tmp', 'loan_tmp', 'deposit_tmp'], service_context=service_context)
qualification_idx = load_index_from_storage(qualification_storage_context, index_id='loan_tmp', service_context=service_context)

In [26]:
desc_card_engine = desc_idx[0].as_query_engine(
    similarity_top_k=2, service_context=service_context
)
desc_loan_engine = desc_idx[1].as_query_engine(
    similarity_top_k=2, service_context=service_context
)
desc_depo_engine = desc_idx[2].as_query_engine(
    similarity_top_k=2, service_context=service_context
)
features_loan_engine = features_idx.as_query_engine(
    similarity_top_k=2, service_context=service_context
)
qualification_loan_engine = qualification_idx.as_query_engine(
    similarity_top_k=2, service_context=service_context
)tkd

In [28]:
query_tool_desc = QueryEngineTool.from_defaults(
    query_engine=desc_card_engine,
    name="desc_card",
    description=(
        f"카드 상품에 대한 상품 정보를 설명한다"
    ),
)
query_tool_loan = QueryEngineTool.from_defaults(
    query_engine=desc_loan_engine,
    name="desc_loan",
    description=(
        f"대출 상품에 대한 상품 정보를 설명한다"
    ),
)
query_tool_deposit = QueryEngineTool.from_defaults(
    query_engine=desc_depo_engine,
    name="desc_depo",
    description=(
        f"예금 상품에 대한 상품 정보를 설명한다"
    ),
)
query_tool_loan_feat = QueryEngineTool.from_defaults(
    query_engine=features_loan_engine,
    name="desc_depo",
    description=(
        f"예금 상품에 대한 특징을 설명한다"
    ),
)
query_tool_loan_qualification = QueryEngineTool.from_defaults(
    query_engine=qualification_loan_engine,
    name="desc_depo",
    description=(
        f"예금 상품에 가입하기 위한 자격 요건을 설명한다"
    ),
)
query_engine_tools = [query_tool_desc, query_tool_loan, query_tool_deposit, query_tool_loan_feat, query_tool_loan_qualification]

In [29]:
query_engine_tools

[<llama_index.tools.query_engine.QueryEngineTool at 0x7f780e575b10>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7f780e5768f0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7f780e575600>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7f780e575510>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7f780e576080>]

In [30]:
import nest_asyncio

nest_asyncio.apply()

In [31]:
service_context = ServiceContext.from_defaults(node_parser=parser, embed_model=embed_model)

In [32]:
from llama_index.query_engine import SubQuestionQueryEngine 

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools, service_context=service_context
)

In [18]:
import asyncio

query_engine.query(QueryBundle('카드 보조ㅡ ')).response

Generated 2 sub questions.
[1;3;38;2;237;90;200m[desc_card] Q: What are the benefits of the card?
[0m[1;3;38;2;237;90;200m[desc_card] A: Context information is below.
---------------------
This card will make you smile

Special Benefits~ Enjoy 우리V외국인체크카드! 외국인을 위한 실용적인 서비스가 가득
---------------------
Given the context information and not prior knowledge, answer the query.
Query: What are the benefits of the card?
Answer: 
[0m[1;3;38;2;90;149;237m[desc_card] Q: What are the features of the card?
[0m[1;3;38;2;90;149;237m[desc_card] A: Context information is below.
---------------------
Catch me in the moment!

This card will make you smile
---------------------
Given the context information and not prior knowledge, answer the query.
Query: What are the features of the card?
Answer: 
[0m

'The benefits of the card include practical services tailored for foreigners.'