In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from supabase import create_client, Client
supabase_url = os.environ.get("SUPABASE_URL")
supabase_api_key = os.environ.get("SUPBASE_KEY")

supabase: Client = create_client(supabase_url, supabase_api_key)

In [2]:
from openai import OpenAI
client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

In [5]:
def report_research(date):
    result = supabase.table('curation_selected_items')\
        .select('event_id, output,sources, news_date','topic')\
    .eq('news_date', date)\
    .eq('topic', 'Report')\
        .order('created_at', desc=False)\
        .execute()
    
    return result.data

In [7]:
report_deep_dive = report_research('2026-02-21')

In [8]:
report_deep_dive

[{'event_id': '1302_2026-02-21',
  'output': 'India\'s Information Technology (Intermediary Guidelines and Digital Media Ethics Code) Amendment Rules, 2026 (IT Rules 2026) — recognized and regulated "Synthetically Generated Information" (SGI). Key requirements: mandatory visible labelling of AI-generated content and audio disclosures; embedding persistent digital identifiers/metadata for traceability (tampering prohibited); much faster removal timelines (3-hour takedown for government/court-declared unlawful content; 2-hour removal for highly sensitive deepfake intimate material); shorter complaint-resolution timelines; stricter pre-publication duties and technical verification requirements for Significant Social Media Intermediaries (SSMIs, platforms >5M users); failure to comply risks loss of safe-harbour protections. Rules brought into force from 20 Feb 2026.',
  'sources': [{'url': 'https://www.lexology.com/library/detail.aspx?g=4db8f74b-7cec-4fdf-806f-ec24e856cd15',
    'name': 'L

In [10]:
def openai_research_workflow(research_input):
    for i in research_input:
        response = client.responses.create(
            model = "gpt-5-nano",
            tools = [{
                "type": "web_search"
            }],
            include = ["web_search_call.action.sources"],
            input = f"""
            You are a senior research analyst for Krux.

            TASK:
              Create a deep, fact-checked brief for ONE AI report/paper/benchmark release.
            
            GOAL:
              Extract the highest-value findings and implementation guidance so a later 100-word summary can capture most practical signal.
            
            OUTPUT FORMAT:
              - 12 to 16 bullet points.
              - Each bullet must include inline citations:
                [Source: <publisher>, <url>]
              - No text before or after bullets.
            
            MANDATORY STRUCTURE:
              1) REPORT SNAPSHOT
              - What was published, by whom, and when.
              - Scope: geography, sectors, sample size, time window, method type.
            
              2) CORE FINDINGS (MOST IMPORTANT)
              - 4 to 6 bullets with the strongest quantified findings.
              - Include exact numbers, not vague language.
            
              3) WHAT THIS ACTUALLY MEANS
              - 2 to 3 bullets translating findings into plain English implications for real teams.
            
              4) IMPLEMENTATION PLAYBOOK
              - 3 to 4 bullets: what organizations should do in the next quarter.
              - Include sequencing (e.g., data readiness -> pilot -> measurement -> rollout).
            
              5) METRICS TO TRACK
              - 1 or 2 bullets on KPIs teams should monitor to apply the report in practice.
            
              6) LIMITATIONS / CAVEATS
              - 1 or 2 bullets on methodology limits, sample bias, missing data, or uncertainty.
            
              7) DECISION TAKEAWAY
              - 1 bullet: “If a team only does one thing based on this report, it should be X.”
            
              STRICT RULES:
              - No hype, no opinion, no generic AI commentary.
              - Every factual claim must be source-backed.
              - If a key detail is missing, write: "Not disclosed in cited sources."
              - Prefer primary sources (original report/paper) over secondary articles.
              - If secondary coverage conflicts with primary report, prioritize primary and note conflict.
            
              QUALITY CHECK:
              - Must contain quantified findings.
              - Must contain actionable implementation steps.
              - Must clearly separate findings from interpretation.
              - Must include limitations.
            
              Report/event to research:
              Report summary: {i['output']}
              Source: {i['sources']}
            """,
        )

        output = response.output_text
        print(output)

        final_dictionary = {
            'event_id': i['event_id'],
            'news_date': i['news_date'],
            'output': output,
            'model_provider': 'openai',
            'topic': i['topic']
        }
        print(final_dictionary)

        save_research(final_dictionary)

        print("✅ Done")

In [13]:
openai_research_workflow(report_deep_dive)

- REPORT SNAPSHOT: Publication of the Information Technology (Intermediary Guidelines and Digital Media Ethics Code) Amendment Rules, 2026 (IT Rules 2026) by the Government of India, published in the Gazette of India on February 10, 2026, effective February 20, 2026. [Source: The Gazette of India, https://egazette.gov.in/WriteReadData/2026/269993.pdf]

- REPORT SNAPSHOT: Scope and scope irritants include intermediaries and Significant Social Media Intermediaries (SSMIs) under the IT Rules framework; key definitions and compliance timelines come into force on February 20, 2026. [Source: The Gazette of India, https://egazette.gov.in/WriteReadData/2026/269993.pdf]

- CORE FINDINGS: The amendment defines “synthetically generated information” (SGI) as audio, visual, or audio-visual content created or altered by a computer resource that appears real, with explicit exemptions for routine or good-faith editing, or the creation/presentation of documents or educational materials that do not prod

In [12]:
def save_research(research_json):
    supabase.table('research_assistant').insert({
        'event_id': research_json['event_id'],
        'model_provider': research_json['model_provider'],
        'news_date': research_json['news_date'],
        'output': research_json['output'],
        'topic': research_json['topic']
    }).execute()