In [None]:
#Write path to your docx and pdf docs here
rag_path = r"your\path\here"

In [None]:
#Your question for the retrieaval and LLM
question = "Who is responsible for the mess?"

In [None]:
#Change local models
LLM_name ="gemma2"
embedder_name = "deepvk/USER-bge-m3"

In [None]:
#Modify prompt template for LLM
prompt = """
1. Используй контекст, чтобы ответить на вопрос в конце.
2. Если ты не знаешь ответа - говори "Я не знаю", не придумывай ответ если не уверена в нем.
3. Старайся отвечать максимально подробно, так, чтобы человек получил исчерпывающий ответ на свой вопрос по документам.  

Контекст: {context}

Вопрос: {question}

Ответ:"""

In [1]:
import os

In [2]:
# What LLM inference you would use?
from langchain_community.llms import Ollama

In [3]:
#What chat functions do you need for demo?
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate

In [4]:
#How would you process pdf?
from langchain_community.document_loaders import PDFPlumberLoader

In [5]:
#How would you process docx files?
from langchain_community.document_loaders import Docx2txtLoader

In [6]:
#What to use for text chunking and embeddings creation
from langchain_experimental.text_splitter import SemanticChunker
from langchain_huggingface import HuggingFaceEmbeddings

In [7]:
#Where to store embeddings?
from langchain_community.vectorstores import FAISS

In [31]:
import torch

In [8]:
#What local LLM would you like to boot?
llm = Ollama(model=LLM_name)

In [9]:
#Where are your documents stored?
documents_path = rag_path

In [11]:
#Get all the relevant docs from main and sub directories
msdocs_list = []
pdfiles_list = []
for path, subdirs, files in os.walk(documents_path):
    for name in files:
        if ".docx" in name:
            msdocs_list.append(os.path.join(path, name))
        elif ".pdf" in name:
            pdfiles_list.append(os.path.join(path, name))
        else:
            continue

In [13]:
#Extract all text and put into single file
doc_list = []
failed_list = []
for doc in pdfiles_list:
    try:
        loader = PDFPlumberLoader(doc)
        doc_list.append(loader.load())
    except:
        failed_list.append(doc)
        continue

for doc in msdocs_list[1:4]:
    try:
        loader = Docx2txtLoader(doc)
        doc_list.append(loader.load())
    except:
        failed_list.append(doc)
        continue

In [23]:
#Split all text into chunks and store them in a list. You can use your preffered chunking method instead of semantic chunker
text_splitter = SemanticChunker(HuggingFaceEmbeddings(model_name=embedder_name,
                                                      model_kwargs = {'device': 'cuda'}), 
                                breakpoint_threshold_type="percentile",
                               breakpoint_threshold_amount=60)

chunk_list = []
for doc in doc_list:
    chunks = text_splitter.split_documents(doc)
    chunk_list.append(chunks)
chunk_list = [j for i in chunk_list for j in i]

You try to use a model that was created with version 3.0.1, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [33]:
#Clear model from GPU - can help with out-of-memory problems. Comment lines if not needed
with torch.no_grad():
    torch.cuda.empty_cache()

In [35]:
# Initiate the embedding model, which turn our chunks into vectors
embedder = HuggingFaceEmbeddings(model_name=embedder_name,model_kwargs = {'device': 'cuda'})

You try to use a model that was created with version 3.0.1, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [37]:
# Create the vector storage for embeddings. You can choose whatever you like instead of FAISS
vector = FAISS.from_documents(chunks, embedder)

In [39]:
#Use the same database as vector search engine
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [85]:
#Config for LLM
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt) 

llm_chain = LLMChain(
                  llm=llm, 
                  prompt=QA_CHAIN_PROMPT, 
                  callbacks=None, 
                  verbose=True )

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
                  llm_chain=llm_chain,
                  document_variable_name="context",
                  document_prompt=document_prompt,
                  callbacks=None,
              )

qa = RetrievalQA(
                  combine_documents_chain=combine_documents_chain,
                  verbose=True,
                  retriever=retriever,
                  return_source_documents=True,
              )

In [89]:
retrieved_docs = retriever.invoke(question)

In [95]:
with torch.no_grad():
    torch.cuda.empty_cache()

In [93]:
#Ask the question
print(qa(question)["result"])



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
1. Используй контекст, чтобы ответить на вопрос в конце.
2. Если ты не знаешь ответа - говори "Я не знаю", не придумывай ответ если не уверена в нем.
3. Старайся отвечать максимально подробно, так, чтобы человек получил исчерпывающий ответ на свой вопрос по документам.  
4. Если ты заметишь логическое или фактическое противоречие в представленном контексте - обязательно скажи об этом пользователю.

Контекст: Context:
content:Блок-схема модели мониторинга уровня стресса.
source:C:\Users\Data Science\Downloads\218_Программисты\218_Программисты\04 Отчетная документация\2 этап\2 этап_Для загрузки_Версия 2\03.G25.31.0247-2.1.1.3. PZ na model stressa-2.docx

Context:
content:В этом случае возникает необходимость исключения каких-либо параметров, имеющих высокую взаимную коррелированность. Таблица 3.13.Регистрируемые биометрические параметры базовой многопараметричес