# Configuration

In [1]:
import os

from langchain.embeddings import HuggingFaceEmbeddings

from langchain.document_loaders import PDFMinerLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp

from langchain.prompts import ChatPromptTemplate
from langchain.prompts.chat import ChatPromptValue
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# Load and split documents

In [2]:
def load_single_document(file_path: str) -> Document:
    loader = PDFMinerLoader(file_path)
    return loader.load()[0]

In [3]:
data_dir = '../data'
file_paths = os.listdir(data_dir)[:2]

In [4]:
%%time
documents = [load_single_document( os.path.join(data_dir, path)) for path in file_paths]

CPU times: total: 1min 23s
Wall time: 1min 32s


In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100, add_start_index=True
)
documents = text_splitter.split_documents(documents)

# Load embeddings

In [6]:
retriever_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
embeddings = HuggingFaceEmbeddings(model_name=retriever_name)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [7]:
%%time
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings)

CPU times: total: 14min 54s
Wall time: 3min 47s


In [8]:
len(documents)

1926

In [10]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# Load model

In [11]:
model_url = "https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf"
model_name = "model-q4_K.gguf"

In [18]:
# %%time
# !curl -v -L "{model_url}" -o "{model_name}"

CPU times: total: 93.8 ms
Wall time: 6min 19s


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 18.165.122.30:443...
* Connected to huggingface.co (18.165.122.30) port 443
* schannel: disabled automatic use of client certificate
* ALPN: curl offers http/1.1
* ALPN: server accepted http/1.1
* using HTTP/1.1
> GET /IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf HTTP/1.1

> Host: huggingface.co

> User-Agent: curl/8.4.0

> Accept: */*

> 

* schannel: remote party requests renegotiation
* schannel: renegotiating SSL/TLS connection
* schannel: SSL/TLS connection renegotiated
< HTTP/1.1 302 Found

< Content-Type: text/plain; charset=utf-8

< Content-Length: 1131

< Connection: keep-alive

< Date: Sun, 24 Dec 2023 15:02:48 GMT

< X-Powered-By: huggingface-moon

< X-Request-Id: Root=1-65884818-141ff09e04be0841312dde71

< Access-C

* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: fail

* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [49152 bytes data]
* schannel: failed to decrypt data, need more data
{ [65536 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
{ [81920 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [98304 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [49152 bytes data]
* schannel: failed to decrypt data, need more data
{ [49152 bytes data]
* schannel: failed to decrypt data, need more data
{ [45424 bytes data]
* schannel: failed to decrypt data

{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [22360 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [14808 bytes data]
* sc

* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
{ [49152 bytes data]
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [98304 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [65282 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [65536 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* sc

{ [65536 bytes data]
* schannel: failed to decrypt data, need more data
{ [794 bytes data]
* schannel: failed to decrypt data, need more data
{ [32768 bytes data]
* schannel: failed to decrypt data, need more data
{ [57566 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [49152 bytes data]
* schannel: failed to decrypt data, need more data
{ [98304 bytes data]
* schannel: failed to decrypt data, need more data
{ [28040 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, need more data
{ [16384 bytes data]
* schannel: failed to decrypt data, need more data
* schannel: failed to decrypt data, 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
n_ctx = 2512
max_tokens = 500

In [13]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_name,
    n_ctx=n_ctx,
    max_tokens=max_tokens,
    f16_kv=False,
    callback_manager=callback_manager,
    verbose=True,
    temperature=0,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


# QnA

In [20]:
template = "Контекст: {context}\n\nИспользуя контекст, ответь на вопрос: {question}"
prompt = ChatPromptTemplate.from_template(template)

In [21]:
output_parser = StrOutputParser()

In [22]:
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | llm | output_parser

In [23]:
chain.invoke("Когда операцию замыкания считают удовлетворительной?")



При нормальном перемещении рукоятки контакты полностью замкнутся и аппарат в состоянии пропускать номинальный рабочий ток.

'\n\nПри нормальном перемещении рукоятки контакты полностью замкнутся и аппарат в состоянии пропускать номинальный рабочий ток.'