In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

model_name = "openai:gpt-4o-mini"


In [2]:
import chromadb


chroma_db = chromadb.PersistentClient(path="db/chromadb")

In [3]:
from typing import List
from pydantic import BaseModel, Field


class SummaryDto(BaseModel):
    """Represents a summary with the title, content, and metadata."""

    title: str = Field(
        ..., description="Title of the summary", example="Rosneft announced dividend payment"
    )
    content: str = Field(
        ...,
        description="Main content of the summary, providing a concise overview",
        example="A robotic dog with a rabbit and a minigun is a new word in home security. A Chinese enthusiast has created an unusual hybrid, where a cute animal has become part of the combat system. Now this mecha-hare not only looks threatening, but also effectively performs its task, monitoring the yard.",
    )
    metadata: dict = Field(
        ...,
        description="Additional metadata related to the summary, such as the source",
        example={"source": "https://www.google.com"},
    )


In [4]:
summaries: List[SummaryDto] = [
    SummaryDto(
        title="Rosneft Announces Dividend Payment",
        content="Rosneft has declared a payment of dividends to its shareholders, highlighting its strong financial performance in the last quarter. The company's strategy to maintain stable earnings despite fluctuating oil prices has been praised by investors.",
        metadata={"source": "https://www.rosneft.com/press/releases/2023/rosneft-announces-dividend-payment"}
    ),
    SummaryDto(
        title="AI Revolutionizes Home Security with Robotic Dogs",
        content="A Chinese innovator has unveiled a robotic dog equipped with advanced surveillance capabilities and a combat system. This 'mecha-hare' is designed to offer both home security and a futuristic twist to traditional surveillance methods.",
        metadata={"source": "https://www.techcrunch.com/ai-robotic-dog-home-security"}
    ),
    SummaryDto(
        title="New Vaccination Guidelines Issued Amid COVID-19 Surge",
        content="The World Health Organization has updated its COVID-19 vaccination guidelines, recommending booster shots for high-risk groups. The new guidelines come in response to a spike in cases in several countries.",
        metadata={"source": "https://www.who.int/news-room/press-releases/2023/new-vaccination-guidelines"}
    ),
    SummaryDto(
        title="Tesla to Launch Full Self-Driving Cars by 2025",
        content="Tesla has announced plans to roll out fully autonomous vehicles by 2025. The new technology is expected to revolutionize the automotive industry and potentially reduce road accidents caused by human error.",
        metadata={"source": "https://www.cnbc.com/2023/09/tesla-full-self-driving-launch"}
    ),
    SummaryDto(
        title="NASA Discovers Water on Mars",
        content="NASA's latest mission has confirmed the presence of liquid water beneath the surface of Mars. This discovery is a significant step toward understanding the potential for life on the Red Planet.",
        metadata={"source": "https://www.nasa.gov/press-release/nasa-discovers-water-on-mars"}
    ),
    SummaryDto(
        title="Microsoft Acquires GitHub for $7.5 Billion",
        content="Microsoft has completed its acquisition of GitHub, a popular code hosting platform. This deal is part of Microsoft's strategy to strengthen its developer tools and cloud services offerings.",
        metadata={"source": "https://www.microsoft.com/en-us/news/press/2023/microsoft-acquires-github"}
    ),
    SummaryDto(
        title="Apple Introduces New AR Glasses at WWDC 2023",
        content="Apple has unveiled its latest product, AR Glasses, during the Worldwide Developers Conference (WWDC) 2023. The glasses are designed to enhance the augmented reality experience and are expected to be a game-changer in wearable technology.",
        metadata={"source": "https://www.apple.com/newsroom/2023/wwdc-2023-ar-glasses"}
    ),
    SummaryDto(
        title="Global Food Crisis Expected to Worsen in 2024",
        content="The United Nations has warned that the global food crisis is likely to worsen in 2024 due to ongoing conflicts, climate change, and supply chain disruptions. The organization is calling for urgent action to address the rising hunger levels.",
        metadata={"source": "https://www.un.org/en/food-crisis-2024-warning"}
    ),
    SummaryDto(
        title="Amazon Launches Drone Delivery Service",
        content="Amazon has officially launched its drone delivery service, allowing customers to receive packages within hours of ordering. The new service is expected to transform the logistics industry and reduce delivery times dramatically.",
        metadata={"source": "https://www.amazon.com/press-release/drone-delivery-launch"}
    ),
    SummaryDto(
        title="China's New Space Station to Host International Research",
        content="China's space station, Tiangong, is now open to international scientists for research opportunities. The station is expected to be a key player in advancing space exploration and scientific discovery.",
        metadata={"source": "https://www.space.com/china-new-space-station-research"}
    ),
]

In [5]:
import uuid
from llama_index.core import Document
from llama_index.core.schema import TextNode

bot_id = "12sdfsdfsdf123123213213"
text_nodes = []

for summary in summaries:
    text=f"{summary.title}\n\n{summary.content}"
    metadata = {"title": summary.title}
    metadata.update(summary.metadata)
    text_node = TextNode(text=text, metadata=metadata)
    text_nodes.append(text_node)
text_nodes

[TextNode(id_='e938f6cb-6e93-499d-bf04-e31fabcbfcf9', embedding=None, metadata={'title': 'Rosneft Announces Dividend Payment', 'source': 'https://www.rosneft.com/press/releases/2023/rosneft-announces-dividend-payment'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="Rosneft Announces Dividend Payment\n\nRosneft has declared a payment of dividends to its shareholders, highlighting its strong financial performance in the last quarter. The company's strategy to maintain stable earnings despite fluctuating oil prices has been praised by investors.", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'),
 TextNode(id_='4d1a060f-885d-4400-87d4-d6d4de5b967d', embedding=None, metadata={'title': 'AI Revolutionizes Home Security with Robotic Dogs', 'source': 'https://www.techcrunch.com/ai-robotic-dog-home-security'}, 

In [6]:
from llama_index.core import StorageContext, VectorStoreIndex

vector_store = chroma_db.get_or_create_collection(bot_id)



In [7]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [9]:
import nest_asyncio
nest_asyncio.apply()

In [10]:
index = VectorStoreIndex(text_nodes, storage_context)
