In [1]:
from faiss_vector_storage import FaissEmbeddingStorage
from trt_llama_api import TrtLlmAPI

from llama_index import ServiceContext, set_global_service_context
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

from utils import get_model_config, read_config

In [2]:
data_dir = r"dataset"
embedded_dimension = 1024
streaming = True
is_chat_engine = False
similarity_top_k = 4
embedded_model = "WhereIsAI/UAE-Large-V1",

model_config_file = 'config\\config.json'

In [3]:
config = read_config(model_config_file)
selected_model_name = "Llama 2 13B int4"
model_config = get_model_config(config, selected_model_name)

In [5]:
llm = TrtLlmAPI(
    model_path=model_config["model_path"],
    tokenizer_dir=model_config["tokenizer_path"],
    temperature=model_config["temperature"],
    max_new_tokens=model_config["max_new_tokens"],
    context_window=model_config["max_input_token"],
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=False
)
embed_model = HuggingFaceEmbeddings(model_name="WhereIsAI/UAE-Large-V1")

[03/05/2024-23:06:31] No sentence-transformers model found with name C:\Users\user/.cache\torch\sentence_transformers\WhereIsAI_UAE-Large-V1. Creating a new one with MEAN pooling.


In [6]:
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model=embed_model, context_window=model_config["max_input_token"], chunk_size=512, chunk_overlap=200
)
set_global_service_context(service_context)

In [7]:
faiss_storage = FaissEmbeddingStorage(
    data_dir=data_dir, dimension=embedded_dimension
)
faiss_storage.initialize_index(force_rewrite=False)
engine = faiss_storage.get_engine(
    is_chat_engine=is_chat_engine, streaming=streaming, similarity_top_k=similarity_top_k
)

Using the persisted value form dataset_vector_embedding


In [8]:
query = "Что такое Бустинг?"

response = engine.query(query)

In [9]:
response