In [1]:
from supabase import create_client
from langchain_community.embeddings import OpenAIEmbeddings
import os
from dotenv import load_dotenv

load_dotenv()

# Initialize your Supabase client here
url = os.getenv("SUPABASE_URL")
key = os.getenv("SUPABASE_KEY")
supabase_client = create_client(url, key)

In [62]:
import requests
import json
import os
import sys
import time
import asyncio
from datetime import datetime, timedelta
import logging
from typing import List, Dict, Optional
from dataclasses import dataclass
import hashlib
import openai

# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# from utils.supabase_client import supabase_client
from bse_data_collection import get_all_data_from_name
# from data_collection.bse_data_collection import get_all_data_from_name, download_pdf_from_link, generate_summary
from utils.database.retrival_utils import get_company_name
from utils.queue_processor import AnnouncementQueue, QueueItem

BASE_URL = "https://api.bseindia.com/BseIndiaAPI/api/AnnSubCategoryGetData/w"
headers = {
            "referer": "https://www.bseindia.com/",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
        }

def _get_date_params() -> tuple:
        """Get today's date parameters in required format"""
        today = datetime.now()
        return (today.strftime("%Y%m%d"), today.strftime("%Y%m%d"))

def _generate_hash(content: str) -> str:
        """Generate hash for announcement content"""
        return hashlib.sha256(content.encode('utf-8')).hexdigest()

def _check_existing_announcement(pdf_name: str) -> bool:
        """Check if announcement already exists in database"""
        try:
            response = supabase_client.table('recent_announcements_2')\
                .select('content')\
                .eq('content', pdf_name)\
                .execute()
            return len(response.data) > 0
        except Exception as e:
            print(f"Error checking existing announcement: {e}")
            return False
        
def remove_duplicate_scrips(data_list):
    """
    Remove duplicate entries from a list of dictionaries based on SCRIP_CD,
    keeping only the first occurrence of each SCRIP_CD.
    
    Parameters:
    data_list (list): List of dictionaries containing stock market data
    
    Returns:
    list: List of dictionaries with duplicate SCRIP_CD entries removed
    """
    seen_scrip_codes = set()
    unique_data = []
    
    for entry in data_list:
        scrip_code = entry.get('SCRIP_CD')
        if scrip_code not in seen_scrip_codes:
            seen_scrip_codes.add(scrip_code)
            unique_data.append(entry)
    
    return unique_data

def fetch_new_announcements() -> List[Dict]:
        """Fetch new announcements from BSE API"""
        from_date, to_date = _get_date_params()
        
        params = {
            "pageno": "1",
            "strCat": "-1",  # All categories
            "strPrevDate": from_date,
            "strScrip": "",  # All scripts
            "strSearch": "P",
            "strToDate": to_date,
            "strType": "C",
            "subcategory": "-1"
        }

        try:
            response = requests.get(
                BASE_URL,
                params=params,
                headers=headers,
                timeout=30
            )
            response.raise_for_status()
            
            data = response.json()
            if "Table" not in data:
                print("No announcements found in response")
                return []
                
            return remove_duplicate_scrips(data["Table"])
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching announcements: {e}")
            return []

In [63]:
res = fetch_new_announcements()

In [64]:
res

[{'NEWSID': 'ddec7e08-d794-46bc-8d98-b54c70bfd5fb',
  'SCRIP_CD': 532015,
  'XML_NAME': 'ANN_532015_DDEC7E08-D794-46BC-8D98-B54C70BFD5FB',
  'NEWSSUB': 'GRAVITY (INDIA) LTD.-$ - 532015 - Disclosures under Reg. 29(2) of SEBI (SAST) Regulations, 2011',
  'DT_TM': '2024-12-19T14:44:12.283',
  'NEWS_DT': '2024-12-19T14:44:12.283',
  'CRITICALNEWS': 0,
  'ANNOUNCEMENT_TYPE': 'A',
  'QUARTER_ID': None,
  'FILESTATUS': 'N    ',
  'ATTACHMENTNAME': 'DDEC7E08_D794_46BC_8D98_B54C70BFD5FB_144406.pdf',
  'MORE': '',
  'HEADLINE': 'The Exchange has received the disclosure under Regulation 29(2) of SEBI (Substantial Acquisition of Shares & Takeovers) Regulations, 2011 for Sanjeev Lunkad & PACs',
  'CATEGORYNAME': 'Insider Trading / SAST',
  'OLD': 1,
  'RN': 1,
  'PDFFLAG': 0,
  'NSURL': 'https://www.bseindia.com/stock-share-price/gravity-(india)-ltd/gravity/532015/',
  'SLONGNAME': 'GRAVITY (INDIA) LTD.-$',
  'AGENDA_ID': 57,
  'TotalPageCnt': 4,
  'News_submission_dt': None,
  'DissemDT': '2024-12

In [65]:
def _store_announcement(stock_name: str, content: str, title:str) -> bool:
# async def _store_announcement(self, stock_name: str, content: str, summary: str, content_hash: str) -> bool:
    """Store announcement in recent_announcements_2 table"""
    try:
        data = {
            "stock_name": stock_name.upper(),
            "content": content,
            "title": title
            # "summary": summary,
            # "hash": content_hash
        }
        
        response = supabase_client.table('recent_announcements_2')\
            .upsert(data)\
            .execute()
            
        return len(response.data) > 0
    except Exception as e:
        print(f"Error storing announcement: {e}")
        return False

In [66]:
_store_announcement('TCS', 'test', 'test')

True

In [67]:
def process_announcement_content(announcement: Dict) -> tuple:
    """Process announcement content and generate summary"""
    try:
        pdf_name = announcement.get("ATTACHMENTNAME")
        title = announcement.get("MORE")
        if title == "":
            title = announcement.get("HEADLINE")
        return pdf_name, title
        
    except Exception as e:
        print(f"Error processing announcement content: {e}")
        return None, None

In [68]:
queue = AnnouncementQueue()

In [71]:
def queue_announcements() -> None:
    """Add new announcements to the processing queue"""
    print("Starting announcement check")
    start_time = time.time()
    
    announcements = fetch_new_announcements()
    queued_count = 0
    
    for announcement in announcements:
        try:
            company_code = announcement.get("SCRIP_CD")
            if not company_code:
                continue
            
            # Get company name
            company_name = get_company_name(str(company_code))
            if not company_name:
                continue

            # Process announcement content
            content, title = process_announcement_content(announcement)
            # if not content:
            #     continue

            # # Generate hash and check if already processed
            # content_hash = _generate_hash(content)
            print(f"get_company_name: {company_name} - {content}")
            if _check_existing_announcement(content):
                print(f"Announcement for {company_name} already exists")
                continue

            # Store in recent_announcements_2 table
            stored = _store_announcement(
                company_name,
                content,
                title
                # summary,
            )
            
            if stored:
                # Add to processing queue
                priority = 1  # Default priority
                added = queue.add_item(company_name, priority)
                if added:
                    queued_count += 1
                    print(f"Queued new announcement for {company_name}")
                
        except Exception as e:
            print(f"Error queuing announcement: {e}")
            continue

    execution_time = time.time() - start_time
    print(f"Announcement check completed. Queued {queued_count} new announcements in {execution_time:.2f} seconds")


In [72]:
queue_announcements()

Starting announcement check
get_company_name: BDL - f3c61c91-f90b-41c2-8263-7fc45e7d8cba.pdf
Announcement for BDL already exists
get_company_name: BSOFT - BF8C2D1B_8A40_4582_A909_FCDDB69CBA7A_144128.pdf
Announcement for BSOFT already exists
get_company_name: HAPPSTMNDS - de24c299-8d81-4012-82d1-4087fc6b5899.pdf
Announcement for HAPPSTMNDS already exists
get_company_name: THOMASCOOK - 3ca75413-d3ae-4ec8-86d8-e93fcf383c7f.pdf
Announcement for THOMASCOOK already exists
get_company_name: SHANTIGEAR - 240439b7-30e7-4096-bc12-595e8edaaa5b.pdf
Announcement for SHANTIGEAR already exists
get_company_name: PTCIL - 58687b81-4943-4cdf-b93d-52a646753327.pdf
Announcement for PTCIL already exists
get_company_name: SARDAEN - 3e5bb1c3-4b01-4815-88a0-4dc9cd0c5da9.pdf
Announcement for SARDAEN already exists
get_company_name: GUJALKALI - 3f118a02-a328-4999-8c41-4ed29625e8e8.pdf
Announcement for GUJALKALI already exists
get_company_name: VOITHPAPR - 2deb3ce8-0134-402b-b812-9fbbb02b8e04.pdf
Announcement for

In [74]:
_check_existing_announcement("445ef9e0-1789-4370-bd90-3781a7f14be9.pdf")

True