In [1]:
from typing import Optional

from pydantic import BaseModel, Field


class FranDoc(BaseModel):
    """Поиск рисков в договоре франчайзинга."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    company_name: Optional[str] = Field(
        default=None, description="Кто является правообладателем"
    )
    conflict_solving: Optional[str] = Field(
        default=None, description="Есть ли в документе информация о порядке разрешения споров"
    )
    restoraunt_name: Optional[str] = Field(
        default=None, description="Название ресторана"
    )



In [2]:
from langchain_core.prompts import ChatPromptTemplate


# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Ты экспертный алгоритм поиска информации в документах. "
            "Если не знаешь значание запрашиваемого атрибута, то возвращай null."
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [3]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('Форма_Договор_коммерческой_концессии_франчайзинга_образе_1.pdf')
docs = loader.load()

print(len(docs))

9


In [4]:
from langchain_openai import ChatOpenAI

from os import getenv
from dotenv import load_dotenv


load_dotenv()

llm = ChatOpenAI(
  openai_api_key=getenv("OPENROUTER_API_KEY"),
  openai_api_base=getenv("OPENROUTER_BASE_URL"),
  model="deepseek/deepseek-chat-v3-0324:free"
)


In [5]:
structured_llm = llm.with_structured_output(schema=FranDoc)

In [6]:
text = ''
for doc in docs:
   text += doc.page_content + " "

In [7]:
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

ValidationError: 1 validation error for FranDoc
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='На основании ...ернется `null`.', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid