In [1]:
!python --version

Python 3.10.14


In [2]:
!pip install -r requirements.txt

Collecting chromadb (from -r requirements.txt (line 1))
  Using cached chromadb-0.5.3-py3-none-any.whl.metadata (6.8 kB)
Collecting google.generativeai (from -r requirements.txt (line 2))
  Using cached google_generativeai-0.7.1-py3-none-any.whl.metadata (3.9 kB)
Collecting langchain (from -r requirements.txt (line 3))
  Using cached langchain-0.2.6-py3-none-any.whl.metadata (7.0 kB)
Collecting langchain-google-genai (from -r requirements.txt (line 4))
  Using cached langchain_google_genai-1.0.7-py3-none-any.whl.metadata (3.8 kB)
Collecting langchain_community (from -r requirements.txt (line 5))
  Using cached langchain_community-0.2.6-py3-none-any.whl.metadata (2.5 kB)
Collecting jupyter (from -r requirements.txt (line 6))
  Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)
Collecting pypdf (from -r requirements.txt (line 7))
  Using cached pypdf-4.2.0-py3-none-any.whl.metadata (7.4 kB)
Collecting python-dotenv (from -r requirements.txt (line 8))
  Using cached pytho

In [3]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.prompts.prompt import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.vectorstores import Chroma
import google.generativeai as genai
from langchain.text_splitter import CharacterTextSplitter


In [29]:
with open('.env', 'w') as f:
    f.write('GOOGLE_API_KEY=api\n')

In [31]:
load_dotenv('.env')
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

In [32]:
llm = ChatGoogleGenerativeAI(model="models/gemini-pro")

In [33]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [34]:
pdf_directory = "data/"
pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith('.pdf')]

text_splitter = CharacterTextSplitter(
    separator=".",
    chunk_size=2500,
    chunk_overlap=150,
    length_function=len,
    is_separator_regex=False,
)

all_pages = []

for pdf_file in pdf_files:
    pdf_path = os.path.join(pdf_directory, pdf_file)
    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split(text_splitter)
    all_pages.extend(pages)

In [35]:
db = Chroma.from_documents(pages, embedding)
db

<langchain_community.vectorstores.chroma.Chroma at 0x11fafd9c0>

In [36]:
retriever = db.as_retriever(search_kwargs={"k": 7})

In [37]:
template = """
Вы - помощник с искусственным интеллектом.
Отвечайте, исходя из предоставленного контекста.

context: {context}
input: {input}
answer:
"""

In [38]:
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [41]:
response = retrieval_chain.invoke({"input": " Какие существуют методы испытаций и какие им соответствуют ГОСТы?"})

In [42]:
print(response["answer"])

**Методы испытаний** | **ГОСТы**
------- | --------
Испытание на растяжение | ГОСТ 10006
Испытание на твердость | ГОСТ 9012
Испытание на сплющивание | ГОСТ 8695
Гидравлическое испытание | ГОСТ 3845
Испытание на загиб | ГОСТ 3728
Испытание на раздачу | ГОСТ 8694
Испытание на бортование | ГОСТ 8693
