In [324]:
import os
from dotenv import load_dotenv

load_dotenv()

model_name = "gpt-4o-mini"

In [311]:
from datetime import datetime
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field


class MessageContentParserDto(BaseModel):
    id: Optional[int] = Field(None, description="The unique identifier of the message")
    content: str = Field(..., description="The text content of the message")


In [312]:
message_instance = MessageContentParserDto(
    id=12345,
    content="Москва встретила Пасху — тысячи православных жителей столицы встретили Светлый праздник в храмах",
)

svo_instant = MessageContentParserDto(
    id=67890,
    content="Специальная военная операция в России"
)

messages = [
    message_instance,
    svo_instant
]

messages

[MessageContentParserDto(id=12345, content='Москва встретила Пасху — тысячи православных жителей столицы встретили Светлый праздник в храмах'),
 MessageContentParserDto(id=67890, content='Специальная военная операция в России')]

In [313]:
system_prompt = """
You are a message analyzer (You are also poliglot, and can understand russian, english, and some other languages).

Follow these rules:

1. You receive list of messages
2. Each message consists of content, metadata, etc.
3. Determine which of those messages are not relevant to the topic `{{topic}}`.
4. Filter out messages that are relevant to the topic `{{topic}}`.

### **Output Format**:
Your response should be a list of messages, that you considered relevant to the topic `{{topic}}`.
If no messages are relevant, return an empty list.

The message is considered relevant if:
- The message is directly related to the topic `{{topic}}`
- The message discusses aspects or subtopics of the main topic `{{topic}}`
- The message provides useful information or context about the topic `{{topic}}`

The message is considered unrelevant if:
- The message is completely unrelated to the topic `{{topic}}`
- The message only mentions the topic in passing without meaningful content
- The message is spam or contains irrelevant information

DOUBLE CHECK YOUR ANSWER BEFORE RETURNING IT!!!
"""

In [314]:
system_prompt_two=""" 
You will be given a list of messages.
Each message contains a 'content' and 'id' fields.

A message is considered relevant if the content contains the phrase "{{topic}}" or any closely related variations of it.

Your task is to:
1. Review the content of each message.
2. Return the list of ids ofmessages that are relevant to the topic "{{topic}}".
4. If there is no message with relevant content, return an empty list.

The message is considered relevant if:
- The message is directly related to the topic `{{topic}}`
- The message discusses aspects or subtopics of the main topic `{{topic}}`
- The message provides useful information or context about the topic `{{topic}}`

The message is considered unrelevant if:
- The message is completely unrelated to the topic `{{topic}}`
- The message only mentions the topic in passing without meaningful content
- The message is spam or contains irrelevant information

### **Output Example**:
Input (topic is "Python programming language"):
[
    {
        "id": 1,
        "content": "Breaking: Python 3.12 released with major performance improvements! The latest version includes a new optimization for function calls, enhanced error messages, and better typing support. Early benchmarks show up to 20% faster execution in some scenarios. #python #programming #tech",
    },
    {
        "id": 2,
        "content": "BMW unveils new electric vehicle lineup for 2025! The luxury automaker announced revolutionary battery technology promising 500 mile range. New models feature advanced autonomous driving capabilities and sustainable materials. #BMW #EV #luxury",
    }
]

Return:
[1]
"""

In [315]:
from typing import List
from pydantic_ai import Agent

topic = "Специальная военная операция в России"
system_prompt_formatted = system_prompt_two.replace("{{topic}}", topic)

agent = Agent(
    model=model_name,
    tools=[],
    deps_type=str,
    output_type=List[str],
    system_prompt=system_prompt_formatted
)


In [316]:
system_prompt_formatted

' \nYou will be given a list of messages.\nEach message contains a \'content\' and \'id\' fields.\n\nA message is considered relevant if the content contains the phrase "Специальная военная операция в России" or any closely related variations of it.\n\nYour task is to:\n1. Review the content of each message.\n2. Return the list of ids ofmessages that are relevant to the topic "Специальная военная операция в России".\n4. If there is no message with relevant content, return an empty list.\n\nThe message is considered relevant if:\n- The message is directly related to the topic `Специальная военная операция в России`\n- The message discusses aspects or subtopics of the main topic `Специальная военная операция в России`\n- The message provides useful information or context about the topic `Специальная военная операция в России`\n\nThe message is considered unrelevant if:\n- The message is completely unrelated to the topic `Специальная военная операция в России`\n- The message only mentions

In [317]:
def datetime_handler(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(
        f"Object of type {type(obj).__name__} is not JSON serializable"
    )

In [318]:
import json

message_parse_dtos = [MessageContentParserDto(id=message.id, content=message.content) for message in messages]
message_parse_dtos

[MessageContentParserDto(id=12345, content='Москва встретила Пасху — тысячи православных жителей столицы встретили Светлый праздник в храмах'),
 MessageContentParserDto(id=67890, content='Специальная военная операция в России')]

In [319]:
json = json.dumps([message.model_dump() for message in message_parse_dtos])
json

'[{"id": 12345, "content": "\\u041c\\u043e\\u0441\\u043a\\u0432\\u0430 \\u0432\\u0441\\u0442\\u0440\\u0435\\u0442\\u0438\\u043b\\u0430 \\u041f\\u0430\\u0441\\u0445\\u0443 \\u2014 \\u0442\\u044b\\u0441\\u044f\\u0447\\u0438 \\u043f\\u0440\\u0430\\u0432\\u043e\\u0441\\u043b\\u0430\\u0432\\u043d\\u044b\\u0445 \\u0436\\u0438\\u0442\\u0435\\u043b\\u0435\\u0439 \\u0441\\u0442\\u043e\\u043b\\u0438\\u0446\\u044b \\u0432\\u0441\\u0442\\u0440\\u0435\\u0442\\u0438\\u043b\\u0438 \\u0421\\u0432\\u0435\\u0442\\u043b\\u044b\\u0439 \\u043f\\u0440\\u0430\\u0437\\u0434\\u043d\\u0438\\u043a \\u0432 \\u0445\\u0440\\u0430\\u043c\\u0430\\u0445"}, {"id": 67890, "content": "\\u0421\\u043f\\u0435\\u0446\\u0438\\u0430\\u043b\\u044c\\u043d\\u0430\\u044f \\u0432\\u043e\\u0435\\u043d\\u043d\\u0430\\u044f \\u043e\\u043f\\u0435\\u0440\\u0430\\u0446\\u0438\\u044f \\u0432 \\u0420\\u043e\\u0441\\u0441\\u0438\\u0438"}]'

In [320]:
result = await agent.run(json)


In [321]:
result.output

['67890']

In [323]:
output_messages = [
                message for message in messages
                if str(message.id) in result.output
            ]

output_messages

[MessageContentParserDto(id=67890, content='Специальная военная операция в России')]