In [24]:
import os
from dotenv import load_dotenv

load_dotenv()

model_name = "gpt-4o-mini"

In [2]:
import chromadb


chroma_db = chromadb.PersistentClient(path="db/chromadb")

In [3]:
from typing import List
from pydantic import BaseModel, Field


class SummaryDto(BaseModel):
    """Represents a summary with the title, content, and metadata."""

    title: str = Field(
        ..., description="Title of the summary", example="Rosneft announced dividend payment"
    )
    content: str = Field(
        ...,
        description="Main content of the summary, providing a concise overview",
        example="A robotic dog with a rabbit and a minigun is a new word in home security. A Chinese enthusiast has created an unusual hybrid, where a cute animal has become part of the combat system. Now this mecha-hare not only looks threatening, but also effectively performs its task, monitoring the yard.",
    )
    metadata: dict = Field(
        ...,
        description="Additional metadata related to the summary, such as the source",
        example={"source": "https://www.google.com"},
    )


In [4]:
summaries: List[SummaryDto] = [
    SummaryDto(
        title="Rosneft Announces Dividend Payment",
        content="Rosneft has declared a payment of dividends to its shareholders, highlighting its strong financial performance in the last quarter. The company's strategy to maintain stable earnings despite fluctuating oil prices has been praised by investors.",
        metadata={"source": "https://www.rosneft.com/press/releases/2023/rosneft-announces-dividend-payment"}
    ),
    SummaryDto(
        title="AI Revolutionizes Home Security with Robotic Dogs",
        content="A Chinese innovator has unveiled a robotic dog equipped with advanced surveillance capabilities and a combat system. This 'mecha-hare' is designed to offer both home security and a futuristic twist to traditional surveillance methods.",
        metadata={"source": "https://www.techcrunch.com/ai-robotic-dog-home-security"}
    ),
    SummaryDto(
        title="New Vaccination Guidelines Issued Amid COVID-19 Surge",
        content="The World Health Organization has updated its COVID-19 vaccination guidelines, recommending booster shots for high-risk groups. The new guidelines come in response to a spike in cases in several countries.",
        metadata={"source": "https://www.who.int/news-room/press-releases/2023/new-vaccination-guidelines"}
    ),
    SummaryDto(
        title="Tesla to Launch Full Self-Driving Cars by 2025",
        content="Tesla has announced plans to roll out fully autonomous vehicles by 2025. The new technology is expected to revolutionize the automotive industry and potentially reduce road accidents caused by human error.",
        metadata={"source": "https://www.cnbc.com/2023/09/tesla-full-self-driving-launch"}
    ),
    SummaryDto(
        title="NASA Discovers Water on Mars",
        content="NASA's latest mission has confirmed the presence of liquid water beneath the surface of Mars. This discovery is a significant step toward understanding the potential for life on the Red Planet.",
        metadata={"source": "https://www.nasa.gov/press-release/nasa-discovers-water-on-mars"}
    ),
    SummaryDto(
        title="Microsoft Acquires GitHub for $7.5 Billion",
        content="Microsoft has completed its acquisition of GitHub, a popular code hosting platform. This deal is part of Microsoft's strategy to strengthen its developer tools and cloud services offerings.",
        metadata={"source": "https://www.microsoft.com/en-us/news/press/2023/microsoft-acquires-github"}
    ),
    SummaryDto(
        title="Apple Introduces New AR Glasses at WWDC 2023",
        content="Apple has unveiled its latest product, AR Glasses, during the Worldwide Developers Conference (WWDC) 2023. The glasses are designed to enhance the augmented reality experience and are expected to be a game-changer in wearable technology.",
        metadata={"source": "https://www.apple.com/newsroom/2023/wwdc-2023-ar-glasses"}
    ),
    SummaryDto(
        title="Global Food Crisis Expected to Worsen in 2024",
        content="The United Nations has warned that the global food crisis is likely to worsen in 2024 due to ongoing conflicts, climate change, and supply chain disruptions. The organization is calling for urgent action to address the rising hunger levels.",
        metadata={"source": "https://www.un.org/en/food-crisis-2024-warning"}
    ),
    SummaryDto(
        title="Amazon Launches Drone Delivery Service",
        content="Amazon has officially launched its drone delivery service, allowing customers to receive packages within hours of ordering. The new service is expected to transform the logistics industry and reduce delivery times dramatically.",
        metadata={"source": "https://www.amazon.com/press-release/drone-delivery-launch"}
    ),
    SummaryDto(
        title="China's New Space Station to Host International Research",
        content="China's space station, Tiangong, is now open to international scientists for research opportunities. The station is expected to be a key player in advancing space exploration and scientific discovery.",
        metadata={"source": "https://www.space.com/china-new-space-station-research"}
    ),
]

In [5]:
import uuid
from llama_index.core import Document
from llama_index.core.schema import TextNode

bot_id = "12sdfsdfsdf123123213213"
text_nodes = []

for summary in summaries:
    text=f"{summary.title}\n\n{summary.content}"
    metadata = {"title": summary.title}
    metadata.update(summary.metadata)
    text_node = TextNode(text=text, metadata=metadata)
    text_nodes.append(text_node)
text_nodes

[TextNode(id_='9048c60f-9d13-4c44-b7a8-6fc1d4783081', embedding=None, metadata={'title': 'Rosneft Announces Dividend Payment', 'source': 'https://www.rosneft.com/press/releases/2023/rosneft-announces-dividend-payment'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="Rosneft Announces Dividend Payment\n\nRosneft has declared a payment of dividends to its shareholders, highlighting its strong financial performance in the last quarter. The company's strategy to maintain stable earnings despite fluctuating oil prices has been praised by investors.", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'),
 TextNode(id_='81aec44d-d89d-4190-92b5-fbc5125089a6', embedding=None, metadata={'title': 'AI Revolutionizes Home Security with Robotic Dogs', 'source': 'https://www.techcrunch.com/ai-robotic-dog-home-security'}, 

In [6]:
from llama_index.core import StorageContext, VectorStoreIndex

chroma_db.delete_collection(bot_id)
vector_store = chroma_db.get_or_create_collection(bot_id)



In [7]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [8]:
import nest_asyncio
nest_asyncio.apply()

In [9]:
index = VectorStoreIndex(text_nodes, storage_context)


In [10]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.query_engine import RetrieverQueryEngine

retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5,
    verbose=True
)

query_engine = RetrieverQueryEngine(
    retriever=retriever,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)]
)




In [11]:
response = query_engine.query("What id doing Amazon with drone Services?")

In [12]:
second_stream_summaries: List[SummaryDto] = [
    SummaryDto(
        title="Discovery of New Exoplanet in the Goldilocks Zone",
        content="Astronomers have discovered a new exoplanet located in the Goldilocks Zone of its star, where conditions are just right for liquid water to exist. The planet, named Kepler-452b, is 1.5 times the size of Earth and orbits a star similar to our Sun. This discovery raises hopes for finding potentially habitable planets beyond our solar system.",
        metadata={"source": "https://www.space.com/new-exoplanet-goldilocks-zone"}
    ),
    SummaryDto(
        title="Tesla Acquires SolarCity to Expand Clean Energy Portfolio",
        content="Tesla has completed its acquisition of SolarCity, a leading solar energy company, for $2.6 billion. This deal marks a significant step for Tesla in its efforts to integrate solar energy and storage solutions into its electric vehicle ecosystem. The acquisition will help Tesla expand its renewable energy services and reduce the reliance on fossil fuels.",
        metadata={"source": "https://www.bbc.com/tesla-acquires-solarcity"}
    ),
    SummaryDto(
        title="UN Holds Emergency Meeting to Address Climate Change Crisis",
        content="The United Nations convened an emergency meeting in response to the worsening climate change crisis, with global temperatures rising at an alarming rate. The meeting focused on immediate actions needed to reduce carbon emissions, including a shift towards renewable energy sources and the implementation of stricter environmental regulations. Countries around the world are expected to commit to more ambitious climate goals in the coming years.",
        metadata={"source": "https://www.un.org/climate-change-emergency-meeting"}
    ),
    SummaryDto(
        title="Tesla to Launch Full Self-Driving Cars by 2025",
        content="Tesla has announced plans to roll out fully autonomous vehicles by 2025. The new technology is expected to revolutionize the automotive industry and potentially reduce road accidents caused by human error.",
        metadata={"source": "https://www.cnbc.com/2023/09/tesla-full-self-driving-launch"}
    ),
    SummaryDto(
        title="NASA Discovers Water on Mars",
        content="NASA's latest mission has confirmed the presence of liquid water beneath the surface of Mars. This discovery is a significant step toward understanding the potential for life on the Red Planet.",
        metadata={"source": "https://www.nasa.gov/press-release/nasa-discovers-water-on-mars"}
    ),
    SummaryDto(
        title="Microsoft Acquires GitHub for $7.5 Billion",
        content="Microsoft has completed its acquisition of GitHub, a popular code hosting platform. This deal is part of Microsoft's strategy to strengthen its developer tools and cloud services offerings.",
        metadata={"source": "https://www.microsoft.com/en-us/news/press/2023/microsoft-acquires-github"}
    ),
    SummaryDto(
        title="Apple Introduces New AR Glasses at WWDC 2023",
        content="Apple has unveiled its latest product, AR Glasses, during the Worldwide Developers Conference (WWDC) 2023. The glasses are designed to enhance the augmented reality experience and are expected to be a game-changer in wearable technology.",
        metadata={"source": "https://www.apple.com/newsroom/2023/wwdc-2023-ar-glasses"}
    ),
    SummaryDto(
        title="Global Food Crisis Expected to Worsen in 2024",
        content="The United Nations has warned that the global food crisis is likely to worsen in 2024 due to ongoing conflicts, climate change, and supply chain disruptions. The organization is calling for urgent action to address the rising hunger levels.",
        metadata={"source": "https://www.un.org/en/food-crisis-2024-warning"}
    ),
    SummaryDto(
        title="Amazon Launches Drone Delivery Service",
        content="Amazon has officially launched its drone delivery service, allowing customers to receive packages within hours of ordering. The new service is expected to transform the logistics industry and reduce delivery times dramatically.",
        metadata={"source": "https://www.amazon.com/press-release/drone-delivery-launch"}
    ),
    SummaryDto(
        title="China's New Space Station to Host International Research",
        content="China's space station, Tiangong, is now open to international scientists for research opportunities. The station is expected to be a key player in advancing space exploration and scientific discovery.",
        metadata={"source": "https://www.space.com/china-new-space-station-research"}
    ),
]

In [13]:
from llama_index.core.tools import QueryEngineTool


def create_engine_tool(summary: SummaryDto) -> QueryEngineTool:
    """
    Create a query engine tool for a summary.
    """
    return QueryEngineTool.from_defaults(
        query_engine=query_engine, description=f"Provides information about possible existing summaries in db about - {summary.title}.")


In [14]:
system_prompt = """
You are a specialized AI agent tasked with sanitizing user notifications to avoid sending duplicate or redundant information.
Your role is to process new summaries of notifications and remove content that has already been sent to the user, using a semantic search tool.

Follow these rules:

1. For each new summary:
    - Use the query engine tools to retrieve past notifications that are semantically similar.
    - Compare the new summary with the retrieved content.
    - Remove or rewrite any parts of the new summary that duplicate information the user already knows.
    - Preserve only novel and valuable content.

2. Tool usage:
    - Always use the corresponding query engine tool for summary topic.
    - The tool will return past notifications that the user has already received.

3. Output format:
    - Return a list of JSON objects, where each object represents a sanitized summary.
    - Each JSON object must have the following structure:
      {
        "title": "original title",
        "content": "sanitized content"
      }
    - Ensure the sanitized summary is concise, clear, and informative.
    - Do not fabricate new information — preserve the original intent.
    - Do not add with returning json object any other explanation by yourself.

4. General rules:
    - Maintain original tone and structure where possible.
    - Avoid repeating facts or messages already present in past notifications.
    - Only return cleaned summaries — do not include analysis or reasoning in the final output.

### **Output Format**:
Your response should be a list of summaries, where each summary is represented as a dictionary with the following fields:
```json
[
  {
    "title": "Title of the Combined Summary",
    "content": "The full, rewritten text of the combined summary",
    "metadata": { 
      "key1": "value1", 
      "key2": "value2" 
    }
  },
  {
    "title": "Title of the Another Combined Summary",
    "content": "The full, rewritten text of the another combined summary",
    "metadata": {
      "key1": "value1", 
      "key2": "value2" 
    }
  }
]

CHECK YOUR ANSWER BEFORE RETURNING IT!!!.
"""


In [15]:
import json
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.openai import OpenAI


tools = [
    create_engine_tool(summary)
    for summary in second_stream_summaries
]




In [16]:
llm = OpenAI(model=model_name, api_key=api_key)
agent = FunctionAgent(
    llm=llm,
    tools=tools,
    system_prompt=system_prompt
)

In [17]:
summaries_json = json.dumps([summary.model_dump() for summary in second_stream_summaries])

summaries_json




In [18]:
handler = agent.run(summaries_json)


In [19]:
from llama_index.core.agent.workflow import AgentStream
from llama_index.core.agent.workflow import AgentOutput, ToolCallResult, ToolCall

result = None
async for event in handler.stream_events():
    if (hasattr(event, "current_agent_name")):
        print(f"agent: {event.current_agent_name}")
    elif isinstance(event, AgentOutput):
        if event.response.content:
            print("📤 Output:", event.response.content)
            result = event.response.content
        if event.tool_calls:
            print(
                "🛠️  Planning to use tools:",
                [call.tool_name for call in event.tool_calls],
            )
    elif isinstance(event, ToolCallResult):
        print(f"🔧 Tool Result ({event.tool_name}):")
        print(f"  Arguments: {event.tool_kwargs}")
        print(f"  Output: {event.tool_output}")
    elif isinstance(event, ToolCall):
        print(f"🔨 Calling Tool: {event.tool_name}")
        print(f"  With arguments: {event.tool_kwargs}")

agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent
agent: Agent

In [20]:
result2 = await agent.run(summaries_json)

In [21]:
print(result2.response.content)


[
  {
    "title": "Discovery of New Exoplanet in the Goldilocks Zone",
    "content": "Astronomers have recently discovered a new exoplanet located within the habitable zone of its star, often referred to as the Goldilocks Zone. This finding is significant as it raises the possibility of potential conditions suitable for life to exist on this distant planet.",
    "metadata": {
      "source": "https://www.space.com/new-exoplanet-goldilocks-zone"
    }
  },
  {
    "title": "Tesla Acquires SolarCity to Expand Clean Energy Portfolio",
    "content": "Tesla has completed its acquisition of SolarCity, a leading solar energy company, for $2.6 billion. This deal marks a significant step for Tesla in its efforts to integrate solar energy and storage solutions into its electric vehicle ecosystem.",
    "metadata": {
      "source": "https://www.bbc.com/tesla-acquires-solarcity"
    }
  },
  {
    "title": "UN Holds Emergency Meeting to Address Climate Change Crisis",
    "content": "The Unit

In [22]:
json_parsed = json.loads(result2.response.content)
summaries = [SummaryDto(title=summary["title"], content=summary["content"], metadata=summary["metadata"]) for summary in json_parsed]
print(summaries)




In [23]:
for summary in summaries:
    print(summary.title)
    print(summary.content)
    print(summary.metadata)
    print("--------------------------------")



Discovery of New Exoplanet in the Goldilocks Zone
Astronomers have recently discovered a new exoplanet located within the habitable zone of its star, often referred to as the Goldilocks Zone. This finding is significant as it raises the possibility of potential conditions suitable for life to exist on this distant planet.
{'source': 'https://www.space.com/new-exoplanet-goldilocks-zone'}
--------------------------------
Tesla Acquires SolarCity to Expand Clean Energy Portfolio
Tesla has completed its acquisition of SolarCity, a leading solar energy company, for $2.6 billion. This deal marks a significant step for Tesla in its efforts to integrate solar energy and storage solutions into its electric vehicle ecosystem.
{'source': 'https://www.bbc.com/tesla-acquires-solarcity'}
--------------------------------
UN Holds Emergency Meeting to Address Climate Change Crisis
The United Nations convened an emergency meeting in response to the worsening climate change crisis, focusing on immediate