In [1]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.vector_stores import ChromaVectorStore
from llama_index.readers.chroma import ChromaReader
from llama_index import StorageContext, load_index_from_storage, load_indices_from_storage
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceLLM
from llama_index.node_parser import SentenceSplitter 
from llama_index.schema import MetadataMode
from IPython.display import Markdown, display
from llama_index import QueryBundle 
from peft import PeftModel, PeftConfig
from transformers import TextStreamer, GenerationConfig
from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaTokenizerFast, BitsAndBytesConfig
from llama_index.response_synthesizers import (
    ResponseMode,
    get_response_synthesizer,
)
import chromadb
import transformers
import pandas as pd
import json 
import openai
import os
import getpass
import torch

In [2]:
default_path = os.getcwd()
data_path = os.path.join(default_path, '/rag/data')
model_path = os.path.join(default_path, '/rag/models')
config_path = os.path.join(default_path, '/rag/config')

In [23]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
openai.api_key = os.environ["OPENAI_API_KEY"]

OpenAI API Key: ········


In [3]:
model_dir = os.path.join(model_path, "mistral_origin")

In [4]:
tokenizer = AutoTokenizer.from_pretrained(os.path.join(model_dir, 'tokenizer'))   # LlamaTokenizer (x)  -> LlamaTokenizerFast (o)
model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.float16, low_cpu_mem_usage=True) # , device_map="auto")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
g_device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
model.to(g_device)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  

In [6]:
generation_config = GenerationConfig(
    temperature=0.8,
    do_sample=True,
    top_p=0.95,
    max_new_tokens=512,
)

In [7]:
col = ['qualification', 'desc', 'features'] 

In [8]:
vector_db = chromadb.HttpClient(host='192.168.0.146')

In [9]:
desc_collection = vector_db.get_or_create_collection("desc")
feature_collection = vector_db.get_or_create_collection("feature")
qualification_collection = vector_db.get_or_create_collection("qualification")

In [10]:
generation_config = GenerationConfig(
    temperature=0.8,
    do_sample=True,
    top_p=0.95,
    max_new_tokens=512,
)

In [11]:
model_name = 'kakaobank/kf-deberta-base'
embed_model = HuggingFaceEmbedding(model_name=model_name)

In [12]:
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)

LLM is explicitly disabled. Using MockLLM.


In [13]:
parser = SentenceSplitter(chunk_size=512, chunk_overlap=30)   # SentenceSplitter(chunk_size=1024, chunk_overlap=20)

In [14]:
embedding_service = ServiceContext.from_defaults(node_parser=parser, embed_model=embed_model, llm=None)

LLM is explicitly disabled. Using MockLLM.


In [15]:
desc_store = ChromaVectorStore(chroma_collection=desc_collection)
feature_store = ChromaVectorStore(chroma_collection=feature_collection)
qualification_store = ChromaVectorStore(chroma_collection=qualification_collection)

In [16]:
# service_context 전달 안해주면 query 시 dimension 오류 발생 
features_idx = VectorStoreIndex.from_vector_store(feature_store, service_context=embedding_service)
desc_idx = VectorStoreIndex.from_vector_store(desc_store, service_context=embedding_service)
qualification_idx = VectorStoreIndex.from_vector_store(qualification_store, service_context=embedding_service)

In [17]:
features_retriever = features_idx.as_retriever()
desc_retriever = desc_idx.as_retriever()
qualification_retriever = qualification_idx.as_retriever()

In [18]:
from llama_index.tools import RetrieverTool

feature_tool = RetrieverTool.from_defaults(
    retriever=features_retriever, 
    llm=None, 
    description=(
        "금융 상품 특징에 대해 물어볼 때 대답해줘"
    ),
)

In [19]:
desc_tool = RetrieverTool.from_defaults(
    retriever=desc_retriever,
    description=(
        "금융 상품 관련해서 설명을 요구할 때 대답해줘"
    ),
)

In [20]:
qualification_tool = RetrieverTool.from_defaults(
    retriever=qualification_retriever,
    description=(
        "금융 상품 관련해서 자격 요건을 물어볼 때 대답해줘"
    ),
)

In [27]:
from llama_index.selectors import LLMSingleSelector, LLMMultiSelector, EmbeddingSingleSelector
'''from llama_index.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)'''
from llama_index.retrievers import RouterRetriever
from llama_index.response.notebook_utils import display_source_node

In [28]:
retriever = RouterRetriever(
    selector=EmbeddingSingleSelector.from_defaults(embed_model=),
    retriever_tools=[
        feature_tool,
        desc_tool,
        qualification_tool,
    ],
)

TypeError: EmbeddingSingleSelector.from_defaults() got an unexpected keyword argument 'service_context'