In [37]:
from langchain.vectorstores.faiss import FAISS
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models.gigachat import GigaChat
from langchain.chains import create_retrieval_chain
from langchain_gigachat.chat_models import GigaChat
from langchain_gigachat.embeddings.gigachat import GigaChatEmbeddings
from pathlib import Path
from vectorization.vectorization import *

In [38]:
API_KEY = "YjllY2FhYjgtNGRlMC00MDA4LWIwZmYtNjdlNjY0ZmI5OTc4OmRkMjZhOWFjLThhNTctNGM3ZC1iZjFkLWQ3NGY1NmRjNTQzMQ=="

In [39]:
doc_files = list(Path("..\\data\\documentation").rglob("*.md"))[:3]
code_files = list(Path("..\\data\\code").rglob("*.py"))[:3]

doc_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
code_splitter = RecursiveCharacterTextSplitter(separators=["\nclass", "\ndef", "\n"], chunk_size=500, chunk_overlap=100)

doc_chunks = [doc_splitter.split_text(Path(file).read_text(encoding="utf-8")) for file in doc_files]
code_chunks = [code_splitter.split_text(Path(file).read_text(encoding="utf-8")) for file in code_files]

docs = [Document(page_content=chunk, metadata={"source": "doc"}) for file in doc_files for chunk in doc_splitter.split_text(Path(file).read_text(encoding="utf-8"))]
code = [Document(page_content=chunk, metadata={"source": "code"}) for file in code_files for chunk in code_splitter.split_text(Path(file).read_text(encoding="utf-8"))]

documents = docs + code

In [40]:
embedding=GigaChatEmbeddings(
        credentials=API_KEY,
        scope="GIGACHAT_API_PERS",
        verify_ssl_certs=False,
)

In [41]:
vector_store = FAISSVectorStore(embedding, vector_store_path="vector_store")
vector_store.create_vector_store(documents)

In [42]:
save_vector_store(vector_store)

In [43]:
loaded_vector_store = load_vector_store("vector_store", embedding)

In [44]:
retriever = get_retriever(loaded_vector_store)

In [45]:
llm = GigaChat(
    credentials=API_KEY,
    verify_ssl_certs=False,
)

prompt = ChatPromptTemplate.from_template('''–¢—ã ‚Äî —Ç–µ—Ö–Ω–∏—á–µ—Å–∫–∏–π –ø–æ–º–æ—â–Ω–∏–∫, —Ä–∞–±–æ—Ç–∞—é—â–∏–π —Å –±–∏–±–ª–∏–æ—Ç–µ–∫–æ–π LangChain. –£ —Ç–µ–±—è –µ—Å—Ç—å –¥–æ—Å—Ç—É–ø –∫ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏ –∏ –∫ –∏—Å—Ö–æ–¥–Ω–æ–º—É –∫–æ–¥—É –±–∏–±–ª–∏–æ—Ç–µ–∫–∏.

–¢–≤–æ—è –∑–∞–¥–∞—á–∞ ‚Äî –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è, –∏—Å–ø–æ–ª—å–∑—É—è –∏ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏—é, –∏ –∫–æ–¥. –°–ª–µ–¥—É–π —ç—Ç–∏–º –ø—Ä–∞–≤–∏–ª–∞–º:

---

1. üìÑ –ï—Å–ª–∏ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –≤–∑—è—Ç–∞ –∏–∑ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏, —É–∫–∞–∂–∏, –∏–∑ –∫–∞–∫–æ–≥–æ –∏–º–µ–Ω–Ω–æ –¥–æ–∫—É–º–µ–Ω—Ç–∞ –æ–Ω–∞ –±—ã–ª–∞ –ø–æ–ª—É—á–µ–Ω–∞. –ü—Ä–∏–º–µ—Ä:  
   _"–°–æ–≥–ª–∞—Å–Ω–æ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏ (load_chain.md)..."_

2. üß© –ï—Å–ª–∏ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –≤–∑—è—Ç–∞ –∏–∑ –∏—Å—Ö–æ–¥–Ω–æ–≥–æ –∫–æ–¥–∞, —É–∫–∞–∂–∏, –∏–∑ –∫–∞–∫–æ–≥–æ —Ñ–∞–π–ª–∞ –æ–Ω–∞ –±—ã–ª–∞ –ø–æ–ª—É—á–µ–Ω–∞. –ü—Ä–∏–º–µ—Ä:  
   _"–í –∫–æ–¥–µ (loader.py) —Ä–µ–∞–ª–∏–∑–æ–≤–∞–Ω–∞ —Ç–æ–ª—å–∫–æ —Ü–µ–ø–æ—á–∫–∞ summarize_chain..."_

3. ‚ö†Ô∏è –ï—Å–ª–∏ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏—è –∏ –∫–æ–¥ –ø—Ä–æ—Ç–∏–≤–æ—Ä–µ—á–∞—Ç –¥—Ä—É–≥ –¥—Ä—É–≥—É, –≤—Å—ë —Ä–∞–≤–Ω–æ —Å—Ñ–æ—Ä–º—É–ª–∏—Ä—É–π –ø–æ–ª–µ–∑–Ω—ã–π –æ—Ç–≤–µ—Ç, –Ω–æ –æ–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –ø—Ä–µ–¥—É–ø—Ä–µ–¥–∏ –æ–± —ç—Ç–æ–º. –ü—Ä–∏–º–µ—Ä:  
   _"–î–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏—è (load_chain.md) —É—Ç–≤–µ—Ä–∂–¥–∞–µ—Ç, —á—Ç–æ –ø–æ–¥–¥–µ—Ä–∂–∏–≤–∞—é—Ç—Å—è 3 —Ü–µ–ø–æ—á–∫–∏, –æ–¥–Ω–∞–∫–æ –≤ –∫–æ–¥–µ (loader.py) —Ä–µ–∞–ª–∏–∑–æ–≤–∞–Ω–∞ —Ç–æ–ª—å–∫–æ –æ–¥–Ω–∞. –≠—Ç–æ –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω–æ–µ –Ω–µ—Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–µ."_

4. ‚ùì –ï—Å–ª–∏ —Ç—ã –Ω–µ —É–≤–µ—Ä–µ–Ω, —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç –ª–∏ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏—è –∫–æ–¥—É, —Ç–∞–∫–∂–µ –ø—Ä–µ–¥—É–ø—Ä–µ–¥–∏ –æ–± —ç—Ç–æ–º. –ü—Ä–∏–º–µ—Ä:  
   _"–ù–µ —É–¥–∞–ª–æ—Å—å –æ–¥–Ω–æ–∑–Ω–∞—á–Ω–æ –ø—Ä–æ–≤–µ—Ä–∏—Ç—å, —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç –ª–∏ –æ–ø–∏—Å–∞–Ω–∏–µ –≤ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏ (agent_overview.md) —Ç–µ–∫—É—â–µ–π —Ä–µ–∞–ª–∏–∑–∞—Ü–∏–∏ –∫–æ–¥–∞ (agent/base.py). –ë—É–¥—å—Ç–µ –≤–Ω–∏–º–∞—Ç–µ–ª—å–Ω—ã."_

5. üí¨ –ò–∑–±–µ–≥–∞–π –≤—ã–º—ã—à–ª–µ–Ω–Ω—ã—Ö –¥–µ—Ç–∞–ª–µ–π ‚Äî –≤—Å–µ —Ñ–∞–∫—Ç—ã –¥–æ–ª–∂–Ω—ã –±—ã—Ç—å –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–µ–Ω—ã —Ñ—Ä–∞–≥–º–µ–Ω—Ç–∞–º–∏ –∏–∑ –∫–æ–¥–∞ –∏–ª–∏ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏.

---

–í –∫–æ–Ω—Ü–µ –æ—Ç–≤–µ—Ç–∞ —Å–¥–µ–ª–∞–π –∫—Ä–∞—Ç–∫–æ–µ –∑–∞–∫–ª—é—á–µ–Ω–∏–µ:
- –£–∫–∞–∑–∞–Ω –ª–∏ –∏—Å—Ç–æ—á–Ω–∏–∫ –∫–∞–∂–¥–æ–≥–æ —É—Ç–≤–µ—Ä–∂–¥–µ–Ω–∏—è?
- –ï—Å—Ç—å –ª–∏ –≤–æ–∑–º–æ–∂–Ω—ã–µ –ø—Ä–æ—Ç–∏–≤–æ—Ä–µ—á–∏—è?

---

–¢–µ–ø–µ—Ä—å –æ—Ç–≤–µ—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å:
"{context}"'''
)

In [46]:
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [47]:
q1 = 'What is LangSmith?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': 'What is LangSmith?',
 'context': [Document(id='355ee863-aa5a-494d-abbd-b574f8aebd18', metadata={'source': 'doc'}, page_content='important\n\nLangServe is designed to primarily deploy simple Runnables and work with well-known primitives in langchain-core.\n\nIf you need a deployment option for LangGraph, you should instead be looking at LangGraph Platform (beta) which will be better suited for deploying LangGraph applications.\n\nFor more information, see the [LangServe documentation](/docs/langserve/).\n\n## LangSmith[\u200b](#langsmith "Direct link to LangSmith")\n\nA developer platform that lets you debug, test, evaluate, and monitor LLM applications.\n\nFor more information, see the [LangSmith documentation](https://docs.smith.langchain.com)'),
  Document(id='215b578e-4f52-4b46-821b-e066c99722f2', metadata={'source': 'doc'}, page_content='## langgraph[\u200b](#langgraph "Direct link to langgraph")\n\n`langgraph` is an extension of `langchain` aimed at building robust and 

In [48]:
q1 = '–ö–∞–∫ –∞–≥–µ–Ω—Ç –ø—Ä–∏–Ω–∏–º–∞–µ—Ç —Ä–µ—à–µ–Ω–∏–µ, —á—Ç–æ –¥–µ–ª–∞—Ç—å?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ö–∞–∫ –∞–≥–µ–Ω—Ç –ø—Ä–∏–Ω–∏–º–∞–µ—Ç —Ä–µ—à–µ–Ω–∏–µ, —á—Ç–æ –¥–µ–ª–∞—Ç—å?',
 'context': [Document(id='c58b7b14-012b-4c43-863c-ae14d843391d', metadata={'source': 'code'}, page_content='Override this to take control of how the agent makes and acts on choices.\n        """\n        try:\n            intermediate_steps = self._prepare_intermediate_steps(intermediate_steps)\n\n            # Call the LLM to see what to do.\n            output = await self._action_agent.aplan(\n                intermediate_steps,\n                callbacks=run_manager.get_child() if run_manager else None,\n                **inputs,\n            )\n        except OutputParserException as e:'),
  Document(id='382e7e90-008e-45b5-a01f-3655b2ae10d9', metadata={'source': 'code'}, page_content='callbacks: Callbacks = None,\n        **kwargs: Any,\n    ) -> Union[\n        list[AgentAction],\n        AgentFinish,\n    ]:\n        """Based on past history and current inputs, decide what to do.\n\n        Ar

In [49]:
q1 = '–ö–∞–∫–∏–µ –≤–µ–∫—Ç–æ—Ä–Ω—ã–µ –±–∞–∑—ã –ø–æ–¥–¥–µ—Ä–∂–∏–≤–∞–µ—Ç LangChain?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ö–∞–∫–∏–µ –≤–µ–∫—Ç–æ—Ä–Ω—ã–µ –±–∞–∑—ã –ø–æ–¥–¥–µ—Ä–∂–∏–≤–∞–µ—Ç LangChain?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strategies that make u

In [50]:
q1 = '–ß—Ç–æ –ø—Ä–æ–∏–∑–æ–π–¥–µ—Ç, –µ—Å–ª–∏ –ø–µ—Ä–µ–¥–∞—Ç—å –Ω–µ–∏–∑–≤–µ—Å—Ç–Ω—É—é —Ü–µ–ø–æ—á–∫—É –≤ load_chain?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ß—Ç–æ –ø—Ä–æ–∏–∑–æ–π–¥–µ—Ç, –µ—Å–ª–∏ –ø–µ—Ä–µ–¥–∞—Ç—å –Ω–µ–∏–∑–≤–µ—Å—Ç–Ω—É—é —Ü–µ–ø–æ—á–∫—É –≤ load_chain?',
 'context': [Document(id='1623dd95-810b-44f2-aab1-4821b12535f5', metadata={'source': 'code'}, page_content='"""Chain that takes in an input and produces an action and action input."""\n\nfrom __future__ import annotations\n\nimport asyncio\nimport builtins\nimport json\nimport logging\nimport time\nfrom abc import abstractmethod\nfrom collections.abc import AsyncIterator, Iterator, Sequence\nfrom pathlib import Path\nfrom typing import (\n    Any,\n    Callable,\n    Optional,\n    Union,\n    cast,\n)\n\nimport yaml\nfrom langchain_core._api import deprecated'),
  Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multipl

In [51]:
q1 = '–ö–∞–∫ —Ä–µ–∞–ª–∏–∑–æ–≤–∞—Ç—å —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—É—é —Ü–µ–ø–æ—á–∫—É?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ö–∞–∫ —Ä–µ–∞–ª–∏–∑–æ–≤–∞—Ç—å —Å–æ–±—Å—Ç–≤–µ–Ω–Ω—É—é —Ü–µ–ø–æ—á–∫—É?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strategies that make up an a

In [52]:
q1 = '–ö–∞–∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å PromptTemplate?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ö–∞–∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å PromptTemplate?',
 'context': [Document(id='07c35582-93ab-4941-b020-f41bcdf21f07', metadata={'source': 'code'}, page_content='"""Prefix to append the LLM call with."""\n\n    @classmethod\n    @abstractmethod\n    def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate:\n        """Create a prompt for this class.\n\n        Args:\n            tools: Tools to use.\n\n        Returns:\n            BasePromptTemplate: Prompt template.\n        """\n\n    @classmethod\n    def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:\n        """Validate that appropriate tools are passed in.\n\n        Args:'),
  Document(id='8e2eb17f-9bce-420e-ab98-2b080d312f33', metadata={'source': 'code'}, page_content='elif isinstance(prompt, FewShotPromptTemplate):\n                prompt.suffix += "\\n{agent_scratchpad}"\n            else:\n                raise ValueError(f"Got unexpected prompt type {type(prompt)}")\n        return self\n\n 

In [53]:
q1 = '–ü—Ä–∏–≤–µ–¥–∏ –ø—Ä–∏–º–µ—Ä –∑–∞–≥—Ä—É–∑–∫–∏ —Ñ–∞–π–ª–æ–≤ –∏–∑ –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏–∏'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ü—Ä–∏–≤–µ–¥–∏ –ø—Ä–∏–º–µ—Ä –∑–∞–≥—Ä—É–∑–∫–∏ —Ñ–∞–π–ª–æ–≤ –∏–∑ –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏–∏',
 'context': [Document(id='e485cdf7-3193-45c7-843e-2093ac11aacd', metadata={'source': 'code'}, page_content='input,\n            config.get("callbacks"),\n            tags=config.get("tags"),\n            metadata=config.get("metadata"),\n            run_name=config.get("run_name"),\n            run_id=config.get("run_id"),\n            yield_actions=True,\n            **kwargs,\n        )\n        async for step in iterator:\n            yield step'),
  Document(id='477d1545-2068-4414-baaf-13b528c41e99', metadata={'source': 'doc'}, page_content='* **[Output parsers](/docs/concepts/output_parsers/)**: Responsible for taking the output of a model and transforming it into a more suitable format for downstream tasks. Output parsers were primarily useful prior to the general availability of [tool calling](/docs/concepts/tool_calling/) and [structured outputs](/docs/concepts/structured_outputs/).

In [54]:
q1 = '–ö–∞–∫–∏–µ –æ—à–∏–±–∫–∏ –±—ã–≤–∞—é—Ç –≤ LangChain –∏ –∫–æ–≥–¥–∞ –æ–Ω–∏ –≤–æ–∑–Ω–∏–∫–∞—é—Ç?'

resp1 = retrieval_chain.invoke(
    {'input': q1}
)

resp1

{'input': '–ö–∞–∫–∏–µ –æ—à–∏–±–∫–∏ –±—ã–≤–∞—é—Ç –≤ LangChain –∏ –∫–æ–≥–¥–∞ –æ–Ω–∏ –≤–æ–∑–Ω–∏–∫–∞—é—Ç?',
 'context': [Document(id='d00dd8d5-9679-4075-a35d-b44246555166', metadata={'source': 'doc'}, page_content='# Architecture\n\nLangChain is a framework that consists of a number of packages.\n\n![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](/svg/langchain_stack_112024.svg "LangChain Framework Overview")\n\n## langchain-core[\u200b](#langchain-core "Direct link to langchain-core")\n\nThis package contains base abstractions for different components and ways to compose them together.\nThe interfaces for core components like chat models, vector stores, tools and more are defined here.\nNo third-party integrations are defined here.\nThe dependencies are very lightweight.\n\n## langchain[\u200b](#langchain "Direct link to langchain")\n\nThe main `langchain` package contains chains and retrieval strate