In [1]:
import chromadb
import pandas as pd 
import openai
import os
import getpass
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index import QueryBundle 
from llama_index.vector_stores import ChromaVectorStore
from llama_index.readers.chroma import ChromaReader
from llama_index import StorageContext, load_index_from_storage, load_indices_from_storage
# from transformers import AutoTokenizer, AutoModel
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceLLM
from llama_index.node_parser import SentenceSplitter 
from llama_index.schema import MetadataMode
from IPython.display import Markdown, display
from llama_index.retrievers import VectorIndexRetriever 
from llama_index.query_engine import RetrieverQueryEngine 
from llama_index.postprocessor import SimilarityPostprocessor 
from llama_index.postprocessor import KeywordNodePostprocessor 
from llama_index.postprocessor import SimilarityPostprocessor, CohereRerank
from llama_index.tools.query_engine import QueryEngineTool, ToolMetadata
from llama_index.schema import Node, NodeWithScore 

In [4]:
data_path = os.path.join('/workspace/data/')
index_path = os.path.join('/workspace/db/local')

In [5]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
openai.api_key = os.environ["OPENAI_API_KEY"]

OpenAI API Key: ········


In [3]:
model_name = 'kakaobank/kf-deberta-base'
embed_model = HuggingFaceEmbedding(model_name=model_name)

In [7]:
parser = SentenceSplitter(chunk_size=512, chunk_overlap=30)   # SentenceSplitter(chunk_size=1024, chunk_overlap=20)

In [8]:
service_context = ServiceContext.from_defaults(node_parser=parser, embed_model=embed_model, llm=None)

LLM is explicitly disabled. Using MockLLM.


In [10]:
desc_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'desc'))
features_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'features'))
qualification_storage_context = StorageContext.from_defaults(persist_dir=os.path.join(index_path, 'qualification'))

In [11]:
# service_context 전달 안해주면 query 시 dimension 오류 발생 
features_idx = load_index_from_storage(features_storage_context, index_id='loan_tmp', service_context=service_context)
desc_idx = load_indices_from_storage(desc_storage_context, index_ids=['card_tmp', 'loan_tmp', 'deposit_tmp'], service_context=service_context)
qualification_idx = load_index_from_storage(qualification_storage_context, index_id='loan_tmp', service_context=service_context)

In [12]:
desc_card_engine = desc_idx[0].as_query_engine(
    similarity_top_k=2, service_context=service_context
)
desc_loan_engine = desc_idx[1].as_query_engine(
    similarity_top_k=2, service_context=service_context
)
desc_depo_engine = desc_idx[2].as_query_engine(
    similarity_top_k=2, service_context=service_context
)

In [15]:
query_tool_desc = QueryEngineTool.from_defaults(
    query_engine=desc_card_engine,
    name="desc_card",
    description=(
        f"카드 상품에 대한 상품 정보를 설명한다"
    ),
)
query_tool_loan = QueryEngineTool.from_defaults(
    query_engine=desc_loan_engine,
    name="desc_loan",
    description=(
        f"대출 상품에 대한 상품 정보를 설명한다"
    ),
)
query_tool_deposit = QueryEngineTool.from_defaults(
    query_engine=desc_depo_engine,
    name="desc_depo",
    description=(
        f"예금 상품에 대한 상품 정보를 설명한다"
    ),
)

query_engine_tools = [query_tool_desc, query_tool_loan, query_tool_deposit]

In [16]:
query_engine_tools

[<llama_index.tools.query_engine.QueryEngineTool at 0x7fb490beaec0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7fb490be94e0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x7fb490be99c0>]

In [17]:
import nest_asyncio

nest_asyncio.apply()

In [28]:
service_context = ServiceContext.from_defaults(node_parser=parser, embed_model=embed_model)

In [29]:
from llama_index.query_engine import SubQuestionQueryEngine 

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools, service_context=service_context
)

In [30]:
import asyncio

query_engine.query(QueryBundle('카드 보조ㅡ '))

Generated 3 sub questions.
[1;3;38;2;237;90;200m[desc_card] Q: What are the features of the desc_card tool?
[0m[1;3;38;2;237;90;200m[desc_card] A: Context information is below.
---------------------
This card will make you smile

Catch me in the moment!
---------------------
Given the context information and not prior knowledge, answer the query.
Query: What are the features of the desc_card tool?
Answer: 
[0m[1;3;38;2;90;149;237m[desc_loan] Q: What information can the desc_loan tool provide?
[0m[1;3;38;2;90;149;237m[desc_loan] A: Context information is below.
---------------------
누구나3분이면한도조회OK!

나(MY)를 위한 맞춤 대출!
---------------------
Given the context information and not prior knowledge, answer the query.
Query: What information can the desc_loan tool provide?
Answer: 
[0m[1;3;38;2;11;159;203m[desc_depo] Q: What data does the desc_depo tool offer?
[0m[1;3;38;2;11;159;203m[desc_depo] A: Context information is below.
---------------------
사업자 은행 거래의 시작!

급여이체실적만으로 더 많은혜택을!
-

Response(response='The desc_card tool offers features that can make you smile and catch you in the moment.', source_nodes=[NodeWithScore(node=TextNode(id_='2a4b8400-48e5-43ca-bd87-0ff098e98cf4', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='074f809ed3002467ce0ad7edd110486dc72704cd86b13c3dba29d6bcff3d335e', text='Sub question: What are the features of the desc_card tool?\nResponse: Context information is below.\n---------------------\nThis card will make you smile\n\nCatch me in the moment!\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: What are the features of the desc_card tool?\nAnswer: ', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=None), NodeWithScore(node=TextNode(id_='10a6bd77-4edd-42d8-b0ef-ec0715e3569a', embedding=None, metadata={}, excluded_em