In [37]:
from langchain.vectorstores.faiss import FAISS
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models.gigachat import GigaChat
from langchain.chains import create_retrieval_chain
from langchain_gigachat.chat_models import GigaChat
from langchain_gigachat.embeddings.gigachat import GigaChatEmbeddings
from pathlib import Path
from vectorization.vectorization import *

In [38]:
API_KEY = "YjllY2FhYjgtNGRlMC00MDA4LWIwZmYtNjdlNjY0ZmI5OTc4OmRkMjZhOWFjLThhNTctNGM3ZC1iZjFkLWQ3NGY1NmRjNTQzMQ=="

In [39]:
doc_files = list(Path("..\\data\\documentation").rglob("*.md"))[:3]
code_files = list(Path("..\\data\\code").rglob("*.py"))[:3]

doc_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
code_splitter = RecursiveCharacterTextSplitter(separators=["\nclass", "\ndef", "\n"], chunk_size=500, chunk_overlap=100)

doc_chunks = [doc_splitter.split_text(Path(file).read_text(encoding="utf-8")) for file in doc_files]
code_chunks = [code_splitter.split_text(Path(file).read_text(encoding="utf-8")) for file in code_files]

docs = [Document(page_content=chunk, metadata={"source": "doc"}) for file in doc_files for chunk in doc_splitter.split_text(Path(file).read_text(encoding="utf-8"))]
code = [Document(page_content=chunk, metadata={"source": "code"}) for file in code_files for chunk in code_splitter.split_text(Path(file).read_text(encoding="utf-8"))]

documents = docs + code

In [40]:
embedding=GigaChatEmbeddings(
        credentials=API_KEY,
        scope="GIGACHAT_API_PERS",
        verify_ssl_certs=False,
)

In [41]:
vector_store = FAISSVectorStore(embedding, vector_store_path="vector_store")
vector_store.create_vector_store(documents)

In [42]:
save_vector_store(vector_store)

In [43]:
loaded_vector_store = load_vector_store("vector_store", embedding)

In [44]:
retriever = get_retriever(loaded_vector_store)

In [45]:
llm = GigaChat(
    credentials=API_KEY,
    verify_ssl_certs=False,
)

prompt = ChatPromptTemplate.from_template('''Ты — технический помощник, работающий с библиотекой LangChain. У тебя есть доступ к документации и к исходному коду библиотеки.

Твоя задача — ответить на вопрос пользователя, используя и документацию, и код. Следуй этим правилам:

---

1. 📄 Если информация взята из документации, укажи, из какого именно документа она была получена. Пример:  
   _"Согласно документации (load_chain.md)..."_

2. 🧩 Если информация взята из исходного кода, укажи, из какого файла она была получена. Пример:  
   _"В коде (loader.py) реализована только цепочка summarize_chain..."_

3. ⚠️ Если документация и код противоречат друг другу, всё равно сформулируй полезный ответ, но обязательно предупреди об этом. Пример:  
   _"Документация (load_chain.md) утверждает, что поддерживаются 3 цепочки, однако в коде (loader.py) реализована только одна. Это потенциальное несоответствие."_

4. ❓ Если ты не уверен, соответствует ли документация коду, также предупреди об этом. Пример:  
   _"Не удалось однозначно проверить, соответствует ли описание в документации (agent_overview.md) текущей реализации кода (agent/base.py). Будьте внимательны."_

5. 💬 Избегай вымышленных деталей — все факты должны быть подтверждены фрагментами из кода или документации.

---

В конце ответа сделай краткое заключение:
- Указан ли источник каждого утверждения?
- Есть ли возможные противоречия?

---

Теперь ответь на вопрос:
"{context}"'''
)

In [46]:
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [47]:
q1 = 'What is LangSmith?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'What is LangSmith?',
 'context': [Document(id='355ee863-aa5a-494d-abbd-b574f8aebd18', metadata={'source': 'doc'}, page_content='important\n\nLangServe is designed to primarily deploy simple Runnables and work with well-known primitives in langchain-core.\n\nIf you need a deployment option for LangGraph, you should instead be looking at LangGraph Platform (beta) which will be better suited for deploying LangGraph applications.\n\nFor more information, see the [LangServe documentation](/docs/langserve/).\n\n## LangSmith[\u200b](#langsmith "Direct link to LangSmith")\n\nA developer platform that lets you debug, test, evaluate, and monitor LLM applications.\n\nFor more information, see the [LangSmith documentation](https://docs.smith.langchain.com)'),
  Document(id='215b578e-4f52-4b46-821b-e066c99722f2', metadata={'source': 'doc'}, page_content='## langgraph[\u200b](#langgraph "Direct link to langgraph")\n\n`langgraph` is an extension of `langchain` aimed at building robust and 

In [48]:
q1 = 'Как агент принимает решение, что делать?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Как агент принимает решение, что делать?',
 'context': [Document(id='c58b7b14-012b-4c43-863c-ae14d843391d', metadata={'source': 'code'}, page_content='Override this to take control of how the agent makes and acts on choices.\n        """\n        try:\n            intermediate_steps = self._prepare_intermediate_steps(intermediate_steps)\n\n            # Call the LLM to see what to do.\n            output = await self._action_agent.aplan(\n                intermediate_steps,\n                callbacks=run_manager.get_child() if run_manager else None,\n                **inputs,\n            )\n        except OutputParserException as e:'),
  Document(id='382e7e90-008e-45b5-a01f-3655b2ae10d9', metadata={'source': 'code'}, page_content='callbacks: Callbacks = None,\n        **kwargs: Any,\n    ) -> Union[\n        list[AgentAction],\n        AgentFinish,\n    ]:\n        """Based on past history and current inputs, decide what to do.\n\n        Args:\n            intermediate_ste

In [49]:
q1 = 'Какие векторные базы поддерживает LangChain?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Какие векторные базы поддерживает LangChain?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strategies that make up an application\'s cognitive 

In [50]:
q1 = 'Что произойдет, если передать неизвестную цепочку в load_chain?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Что произойдет, если передать неизвестную цепочку в load_chain?',
 'context': [Document(id='1623dd95-810b-44f2-aab1-4821b12535f5', metadata={'source': 'code'}, page_content='"""Chain that takes in an input and produces an action and action input."""\n\nfrom __future__ import annotations\n\nimport asyncio\nimport builtins\nimport json\nimport logging\nimport time\nfrom abc import abstractmethod\nfrom collections.abc import AsyncIterator, Iterator, Sequence\nfrom pathlib import Path\nfrom typing import (\n    Any,\n    Callable,\n    Optional,\n    Union,\n    cast,\n)\n\nimport yaml\nfrom langchain_core._api import deprecated'),
  Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "

In [51]:
q1 = 'Как реализовать собственную цепочку?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Как реализовать собственную цепочку?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strategies that make up an application\'s cognitive architec

In [52]:
q1 = 'Как использовать PromptTemplate?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Как использовать PromptTemplate?',
 'context': [Document(id='07c35582-93ab-4941-b020-f41bcdf21f07', metadata={'source': 'code'}, page_content='"""Prefix to append the LLM call with."""\n\n    @classmethod\n    @abstractmethod\n    def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate:\n        """Create a prompt for this class.\n\n        Args:\n            tools: Tools to use.\n\n        Returns:\n            BasePromptTemplate: Prompt template.\n        """\n\n    @classmethod\n    def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:\n        """Validate that appropriate tools are passed in.\n\n        Args:'),
  Document(id='8e2eb17f-9bce-420e-ab98-2b080d312f33', metadata={'source': 'code'}, page_content='elif isinstance(prompt, FewShotPromptTemplate):\n                prompt.suffix += "\\n{agent_scratchpad}"\n            else:\n                raise ValueError(f"Got unexpected prompt type {type(prompt)}")\n        return self\n\n    @property\n 

In [53]:
q1 = 'Приведи пример загрузки файлов из директории'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Приведи пример загрузки файлов из директории',
 'context': [Document(id='e485cdf7-3193-45c7-843e-2093ac11aacd', metadata={'source': 'code'}, page_content='input,\n            config.get("callbacks"),\n            tags=config.get("tags"),\n            metadata=config.get("metadata"),\n            run_name=config.get("run_name"),\n            run_id=config.get("run_id"),\n            yield_actions=True,\n            **kwargs,\n        )\n        async for step in iterator:\n            yield step'),
  Document(id='477d1545-2068-4414-baaf-13b528c41e99', metadata={'source': 'doc'}, page_content='* **[Output parsers](/docs/concepts/output_parsers/)**: Responsible for taking the output of a model and transforming it into a more suitable format for downstream tasks. Output parsers were primarily useful prior to the general availability of [tool calling](/docs/concepts/tool_calling/) and [structured outputs](/docs/concepts/structured_outputs/).\n* **[Few-shot prompting](/docs/concep

In [54]:
q1 = 'Какие ошибки бывают в LangChain и когда они возникают?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'Какие ошибки бывают в LangChain и когда они возникают?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strategies that make up an application\'s 