In [1]:
from openai import OpenAI

In [2]:
from dotenv import load_dotenv

In [4]:
load_dotenv()

True

In [5]:
import os

In [6]:
client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

In [7]:
from datetime import datetime

In [8]:
import requests

In [9]:
from supabase import create_client, Client

In [10]:
supabase_url = os.environ.get("SUPABASE_URL")

In [11]:
supabase_api_key = os.environ.get("SUPBASE_KEY")

In [12]:
supabase: Client = create_client(supabase_url, supabase_api_key)

In [13]:
result = supabase.table('webhooks')\
    .select('id, news_output,source_urls, news_date')\
.gte('news_date', '2026-02-08')\
    .order('created_at', desc=False)\
    .execute()

result.data

[{'id': 282,
  'news_output': 'Crypto.com CEO Kris Marszalek is launching ai.com, a consumer-focused autonomous/agentic AI platform that allows users to create agents to perform tasks on their behalf. Reports say the ai.com domain was purchased for about $70 million and the platform will debut with a Super Bowl LX advertisement; the launch is reported to go live Feb 8, 2026, with free and paid subscription tiers and plans for features like financial integrations, agent marketplaces, and social components.',
  'source_urls': ['https://www.pcmag.com/news/aicom-domain-name-sells-for-70-million-set-to-launch-at-super-bowl-lx'],
  'news_date': '2026-02-08'},
 {'id': 283,
  'news_output': 'Crypto.com CEO Kris Marszalek is launching ai.com, a consumer-focused autonomous/agentic AI platform that allows users to create agents to perform tasks on their behalf. Reports say the ai.com domain was purchased for about $70 million and the platform will debut with a Super Bowl LX advertisement; the lau

In [21]:
news_data = [{'id': 299,
  'news_output': 'Additional reports expand the OpenAI earbuds rumor with new naming details: outlets cite a code-name "Sweetpea" and say the earbuds may be sold under a consumer name "Dime." Coverage emphasizes these are leaks/rumors (no official OpenAI confirmation) and mentions possible product scaling/backing concerns (e.g., memory costs).',
  'source_urls': ['https://wccftech.com/openais-sweetpea-earbuds-to-be-sold-under-the-consumer-name-dime-now-expected-to-chronically-underdeliver-on-the-hype/'],
  'news_date': '2026-02-08'}]


news_data[0]

{'id': 299,
 'news_output': 'Additional reports expand the OpenAI earbuds rumor with new naming details: outlets cite a code-name "Sweetpea" and say the earbuds may be sold under a consumer name "Dime." Coverage emphasizes these are leaks/rumors (no official OpenAI confirmation) and mentions possible product scaling/backing concerns (e.g., memory costs).',
 'source_urls': ['https://wccftech.com/openais-sweetpea-earbuds-to-be-sold-under-the-consumer-name-dime-now-expected-to-chronically-underdeliver-on-the-hype/'],
 'news_date': '2026-02-08'}

In [22]:
def prompt_generator_research():
    formatted_prompt = []
    
    for item in news_data:
        event_id = f"{item['id']}_{item['news_date']}"
        prompt = f"""You are given news summary: {item['news_output']} and source: {item['source_urls'][0]}, you are supposed to run a deep research and read other sources as well. And then give a well structured ouput in the form of points and sources."""

        prompt_dict = {
            'event_id': event_id,
            'prompt': prompt
        }
        
        formatted_prompt.append(prompt_dict)

    return formatted_prompt

In [23]:
prompt_generator_research()

[{'event_id': '299_2026-02-08',
  'prompt': 'You are given news summary: Additional reports expand the OpenAI earbuds rumor with new naming details: outlets cite a code-name "Sweetpea" and say the earbuds may be sold under a consumer name "Dime." Coverage emphasizes these are leaks/rumors (no official OpenAI confirmation) and mentions possible product scaling/backing concerns (e.g., memory costs). and source: https://wccftech.com/openais-sweetpea-earbuds-to-be-sold-under-the-consumer-name-dime-now-expected-to-chronically-underdeliver-on-the-hype/, you are supposed to run a deep research and read other sources as well. And then give a well structured ouput in the form of points and sources.'}]

In [33]:
research_output = []

In [53]:
def openai_research_v2():
    for i in news_data:
        response = client.responses.create(
            model = "gpt-5-nano",
            tools = [{
                "type": "web_search"
            }],
            include = ["web_search_call.action.sources"],
            input = f"""You are an AI research analyst for Krux.

            Your job is to expand a given AI news summary into a structured, fact-checked research brief.

            CRITICAL REQUIREMENTS:
            - You must ONLY include factual information.
            - DO NOT include opinions, interpretations, predictions, or analysis.
            - Every fact MUST be traceable to a credible published source.
            - If a fact cannot be verified from a reliable source, DO NOT include it.
            - Never assume or infer missing information.

            TASK:
            1. Validate the claims made in the summary.
            2. Expand with additional confirmed facts.
            3. Provide supporting citations for every statement.

            OUTPUT FORMAT REQUIRED:
            - Make sure you're just talking about the news and sources
            - Getting straight to the news sources without any background information
            - Make sure the output is in points


            Here are the details of the news that you need to research on:

            {i['news_output']}
            """,
        )

        output = response.output_text
        print(output)

        final_dictionary = {
            'event_id': f"{i['id']}_{i['news_date']}",
            'news_date': i['news_date'],
            'output': output,
            'model_provider': 'openai'
        }

        research_output.append(final_dictionary)
        print(final_dictionary)

        save_research(final_dictionary)

        research_output.append(final_dictionary)

    return research_output

In [54]:
openai_research_v2()

- OpenAI publicly indicated at Davos that a new AI hardware device is in development and that the “most likely” timeline is the second half of 2026. ([axios.com](https://www.axios.com/2026/01/23/openai-device-earbuds-sweetpea-altman))

- Multiple outlets report that the upcoming device is expected to be a pair of earbuds, with codename Sweetpea. ([axios.com](https://www.axios.com/2026/01/23/openai-device-earbuds-sweetpea-altman))

- The rumored design places hardware behind the ear (behind-the-ear form factor) rather than traditional in-ear buds. ([axios.com](https://www.axios.com/2026/01/23/openai-device-earbuds-sweetpea-altman))

- A 2-nanometer processor and on-device AI processing have been flagged in coverage as possible specs for the Sweetpea earbuds; some reports also say most AI workloads could run in the cloud. ([techcrunch.com](https://techcrunch.com/2026/01/21/openai-aims-to-ship-its-first-device-in-2026-and-it-could-be-earbuds/))

- A first-year shipment goal of 40–50 milli

[{'event_id': '299_2026-02-08',
  'news_date': '2026-02-08',
  'output': '- Verified: The earbuds rumor centers on an internal code name for OpenAI’s first consumer hardware device being Sweetpea. Multiple credible outlets report this codename as part of the leak cycle about the device. ([techcrunch.com](https://techcrunch.com/2026/01/21/openai-aims-to-ship-its-first-device-in-2026-and-it-could-be-earbuds/))\n\n- Verified: Several credible outlets note that the project is not officially confirmed and remains a leak/rumor at this time. TechCrunch explicitly describes the details as coming from leaks and “tipsters,” not an official OpenAI confirmation; Axios likewise characterizes the information as rumors shared at Davos. ([techcrunch.com](https://techcrunch.com/2026/01/21/openai-aims-to-ship-its-first-device-in-2026-and-it-could-be-earbuds/))\n\n- Verified: The same reporting that mentions Sweetpea also references a potential second consumer device codenamed Gumdrop in the rumor mill. 

In [50]:
def save_research(research_json):
    supabase.table('research_assistant').insert({
        'event_id': research_json['event_id'],
        'model_provider': research_json['model_provider'],
        'news_date': research_json['news_date'],
        'output': research_json['output']
    }).execute()