In [1]:
!pip install sentence-transformers
!pip install faiss-cpu
!pip install datasets
!pip install openai



In [2]:
import os
from sentence_transformers import SentenceTransformer, util
import logging
import faiss
import numpy as np
import time
from typing import List, Dict, Tuple, Callable, Any
import psutil

class ScalableSemanticSearch:
    """Vector similarity using product quantization with sentence transformers embeddings and cosine similarity."""

    def __init__(self, device="cpu"):
        self.device = device
        self.model = SentenceTransformer(
            "sentence-transformers/all-mpnet-base-v2", device=self.device
        )
        self.dimension = self.model.get_sentence_embedding_dimension()
        self.quantizer = None
        self.index = None
        self.hashmap_index_sentence = None

        log_directory = "log"
        if not os.path.exists(log_directory):
            os.makedirs(log_directory)
        log_file_path = os.path.join(log_directory, "scalable_semantic_search.log")

        logging.basicConfig(
            filename=log_file_path,
            level=logging.INFO,
            format="%(asctime)s %(levelname)s: %(message)s",
        )
        logging.info("ScalableSemanticSearch initialized with device: %s", self.device)

    @staticmethod
    def calculate_clusters(n_data_points: int) -> int:
        return max(2, min(n_data_points, int(np.sqrt(n_data_points))))

    def encode(self, data: List[str]) -> np.ndarray:
        """Encode input data using sentence transformer model.

        Args:
            data: List of input sentences.

        Returns:
            Numpy array of encoded sentences.
        """
        embeddings = self.model.encode(data)
        self.hashmap_index_sentence = self.index_to_sentence_map(data)
        return embeddings.astype("float32")

    def build_index(self, embeddings: np.ndarray) -> None:
        """Build the index for FAISS search.

        Args:
            embeddings: Numpy array of encoded sentences.
        """
        n_data_points = len(embeddings)
        if (
            n_data_points >= 1500
        ):  # Adjust this value based on the minimum number of data points required for IndexIVFPQ
            self.quantizer = faiss.IndexFlatL2(self.dimension)
            n_clusters = self.calculate_clusters(n_data_points)
            self.index = faiss.IndexIVFPQ(
                self.quantizer, self.dimension, n_clusters, 8, 4
            )
            logging.info("IndexIVFPQ created with %d clusters", n_clusters)
        else:
            self.index = faiss.IndexFlatL2(self.dimension)
            logging.info("IndexFlatL2 created")

        if isinstance(self.index, faiss.IndexIVFPQ):
            self.index.train(embeddings)
        self.index.add(embeddings)
        logging.info("Index built on device: %s", self.device)

    @staticmethod
    def index_to_sentence_map(data: List[str]) -> Dict[int, str]:
        """Create a mapping between index and sentence.

        Args:
            data: List of sentences.

        Returns:
            Dictionary mapping index to the corresponding sentence.
        """
        return {index: sentence for index, sentence in enumerate(data)}

    @staticmethod
    def get_top_sentences(
        index_map: Dict[int, str], top_indices: np.ndarray
    ) -> List[str]:
        """Get the top sentences based on the indices.

        Args:
            index_map: Dictionary mapping index to the corresponding sentence.
            top_indices: Numpy array of top indices.

        Returns:
            List of top sentences.
        """
        return [index_map[i] for i in top_indices]

    def search(self, input_sentence: str, top: int) -> Tuple[np.ndarray, np.ndarray]:
        """Compute cosine similarity between an input sentence and a collection of sentence embeddings.

        Args:
            input_sentence: The input sentence to compute similarity against.
            top: The number of results to return.

        Returns:
            A tuple containing two numpy arrays. The first array contains the cosine similarities between the input
            sentence and the embeddings, ordered in descending order. The second array contains the indices of the
            corresponding embeddings in the original array, also ordered by descending similarity.
        """
        vectorized_input = self.model.encode(
            [input_sentence], device=self.device
        ).astype("float32")
        D, I = self.index.search(vectorized_input, top)
        return I[0], 1 - D[0]

    def save_index(self, file_path: str) -> None:
        """Save the FAISS index to disk.

        Args:
            file_path: The path where the index will be saved.
        """
        if hasattr(self, "index"):
            faiss.write_index(self.index, file_path)
        else:
            raise AttributeError(
                "The index has not been built yet. Build the index using `build_index` method first."
            )

    def load_index(self, file_path: str) -> None:
        """Load a previously saved FAISS index from disk.

        Args:
            file_path: The path where the index is stored.
        """
        if os.path.exists(file_path):
            self.index = faiss.read_index(file_path)
        else:
            raise FileNotFoundError(f"The specified file '{file_path}' does not exist.")

    @staticmethod
    def measure_time(func: Callable, *args, **kwargs) -> Tuple[float, Any]:
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        elapsed_time = end_time - start_time
        return elapsed_time, result

    @staticmethod
    def measure_memory_usage() -> float:
        process = psutil.Process(os.getpid())
        ram = process.memory_info().rss
        return ram / (1024**2)

    def timed_train(self, data: List[str]) -> Tuple[float, float]:
        start_time = time.time()
        embeddings = self.encode(data)
        self.build_index(embeddings)
        end_time = time.time()
        elapsed_time = end_time - start_time
        memory_usage = self.measure_memory_usage()
        logging.info(
            "Training time: %.2f seconds on device: %s", elapsed_time, self.device
        )
        logging.info("Training memory usage: %.2f MB", memory_usage)
        return elapsed_time, memory_usage

    def timed_infer(self, query: str, top: int) -> Tuple[float, float]:
        start_time = time.time()
        _, _ = self.search(query, top)
        end_time = time.time()
        elapsed_time = end_time - start_time
        memory_usage = self.measure_memory_usage()
        logging.info(
            "Inference time: %.2f seconds on device: %s", elapsed_time, self.device
        )
        logging.info("Inference memory usage: %.2f MB", memory_usage)
        return elapsed_time, memory_usage

    def timed_load_index(self, file_path: str) -> float:
        start_time = time.time()
        self.load_index(file_path)
        end_time = time.time()
        elapsed_time = end_time - start_time
        logging.info(
            "Index loading time: %.2f seconds on device: %s", elapsed_time, self.device
        )
        return elapsed_time

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import json
import os
import pandas as pd
from typing import List, Dict, Tuple, Callable, Any


class DST_Prompter:

    def __init__(self, query: str):
        self.preliminary_data = []
        self.query = query
        self.data = self._data_load()

    def _data_load(self):
        """
        Load data and convert to dict.
        """
        self.data = pd.read_csv(r"C:\\Users\\Askar\\VS_CODE\\Python\\PROJECT\\AIRPLANE\\Data.csv")
        self.data = self.data.to_dict('records')
        return self.data

    def _get_all_text(self):
        all_text = []
        for text in self.data:
            all_text.append(text["Text"])
        return all_text
    
    def Belief_instructions(self):
        belief_list = []


        keys = []
        for part in self.data:
            for key in part.keys():
                keys.append(key)

        keys = list(set(keys))
        keys.remove("Text")

        for key in keys:
            dict_of_part = {}
            val_key = []
            
            for d in self.data:

                if pd.isna(d[key]):
                    continue
                else:

                    if len(val_key) <= 15:
                        val_key.append(d[key])
                    else:
                        break
                
            dict_of_part[key] = val_key
            belief_list.append(dict_of_part)
            
        return belief_list

    def _semantic_search(self):

        semantic_search = ScalableSemanticSearch(device="cpu")
        corpus = self._get_all_text()
        embeddings = semantic_search.encode(corpus)
        semantic_search.build_index(embeddings)
        top_indices, top_scores = semantic_search.search(self.query, top=5)
        top_sentences = ScalableSemanticSearch.get_top_sentences(semantic_search.hashmap_index_sentence, top_indices)
        return top_sentences[0]

    def formatting_example(self):
        
        sentence = self._semantic_search()

        all_text = self._get_all_text()  # Corrected line
        if sentence in all_text:
            index = all_text.index(sentence)
        
        return self.data[index]


In [4]:
dst = DST_Prompter("Здравствуйте! Меня зовут Фичиков Олег Сергеевич. Хочу забронировать один билет с багажом из Сочи в Москву 20 июля в 10:00.")

In [5]:
close_sentence = dst.formatting_example()

Batches: 100%|██████████| 3/3 [00:16<00:00,  5.39s/it]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.91it/s]


In [16]:
zapros = "Здравствуйте! Меня зовут Валуев Олег Сергеевич. Хочу забронировать один билет с багажом из Новосибирска в Москву 20 июля в 10:00."

In [20]:
# pip install llama-cpp-python fire
import requests
import fire
from llama_cpp import Llama



SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
SYSTEM_TOKEN = 1788
USER_TOKEN = 1404
BOT_TOKEN = 9225
LINEBREAK_TOKEN = 13

ROLE_TOKENS = {
    "user": USER_TOKEN,
    "bot": BOT_TOKEN,
    "system": SYSTEM_TOKEN
}


def get_message_tokens(model, role, content):
    message_tokens = model.tokenize(content.encode("utf-8"))
    message_tokens.insert(1, ROLE_TOKENS[role])
    message_tokens.insert(2, LINEBREAK_TOKEN)
    message_tokens.append(model.token_eos())
    return message_tokens


def get_system_tokens(model):
    system_message = {
        "role": "system",
        "content": SYSTEM_PROMPT
    }
    return get_message_tokens(model, **system_message)

model_path = r"C:\\Users\\Askar\\Desktop\\nlp_api\\model-q8_0.gguf"
#model_path = r"C:\\Users\\Askar\\VS_CODE\\Python\\WORK\\ChatBot\\LLama2\\saiga_checkpoints\\ggml-model-q3_K.gguf"
n_ctx = 2000
top_k = 30
top_p = 0.9
temperature = 0.2
repeat_penalty = 1.1

model = Llama(model_path=model_path, n_ctx=n_ctx, n_parts=1)

system_tokens = get_system_tokens(model)
tokens = system_tokens
model.eval(tokens)


user_message = f"""User: Следуя ПРИМЕРАМ, Ивзлеки эти данные из предложения: Фамилия, Имя, Отчество, Откуда, Куда, Дата вылета, Время вылета, Количество взрослых,Количество детей,Класс,Багаж.
                    Эти ПРМИЕРЫ поможгут тебе получить необходимые данные из запроса. Не используйте эти ПРИМЕРЫ самом запросе. ПРИМЕРЫ :{dst.Belief_instructions()}, ПРИМЕР: {close_sentence}.
                    Предложение = '{zapros}'. Напиши только полученные данные! 
                     """
message_tokens = get_message_tokens(model=model, role="user", content=user_message)
role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
tokens += message_tokens + role_tokens
generator = model.generate(
    tokens,
    top_k=top_k,
    top_p=top_p,
    temp=temperature,
    repeat_penalty=repeat_penalty
)
for token in generator:
    token_str = model.detokenize([token]).decode("utf-8", errors="ignore")
    tokens.append(token)
    if token == model.token_eos():
        break
    print(token_str, end="", flush=True)




AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


Llama.generate: prefix-match hit


{
    "Имя": ["Валуев"],
    "Фамилия": ["Олег"],
    "Отчество": ["Сергеевич"],
    "Откуда": ["Новосибирск"],
    "Куда": ["Москва"],
    "Дата вылета": ["20.07.2023"],
    "Время вылета": ["10:00"],
    "Количество взрослых": [1],
    "Количество детей": [0.0],
    "Класс": ["Бизнес"],
    "Багажник": ["Да"]
}

In [None]:
"C:\\Users\\Askar\\Desktop\\nlp_api\\model-q8_0.gguf"