In [1]:
import re
import base64
import logging
import time
import os.path
import json
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from googleapiclient.errors import HttpError
from googleapiclient.http import BatchHttpRequest
from tabulate import tabulate
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
import pytz
from dateutil.parser import parse as parse_date
import email
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from functools import wraps
import schedule
import threading
import http.server
import socketserver

# Configure logging
logging.basicConfig(filename='date_parsing.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logging.getLogger('google_auth_oauthlib').setLevel(logging.DEBUG)

# Updated SCOPES to include Gmail send permission
SCOPES = [
    'https://www.googleapis.com/auth/gmail.readonly',
    'https://www.googleapis.com/auth/calendar.readonly',
    'https://www.googleapis.com/auth/gmail.send'
]

# Manually defined list of email addresses for alerts
ALERT_RECIPIENTS = [
    'andrew@iitlabs.com',
    # Add more email addresses as needed
]

# Manually defined time for daily alerts (24-hour format, e.g., "09:30" for 9:30 AM)
ALERT_TIME = "18:40"  # Updated to match your email timestamp

# Port for the HTTP server
HTTP_PORT = 8000

# HTML content for the interactive UI
HTML_CONTENT = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>RFP Opportunities Dashboard</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link rel="stylesheet" href="https://cdn.datatables.net/1.13.6/css/jquery.dataTables.min.css">
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <script src="https://cdn.datatables.net/1.13.6/js/jquery.dataTables.min.js"></script>
    <style>
        body { font-family: Arial, sans-serif; }
        .dataTables_wrapper .dataTables_filter input {
            border: 1px solid #e5e7eb;
            padding: 0.5rem;
            border-radius: 0.375rem;
            margin-bottom: 1rem;
        }
        .dataTables_wrapper .dataTables_length select {
            border: 1px solid #e5e7eb;
            padding: 0.5rem;
            border-radius: 0.375rem;
        }
    </style>
</head>
<body class="bg-gray-100">
    <div class="max-w-7xl mx-auto p-6">
        <h1 class="text-3xl font-bold mb-6">RFP Opportunities Due Today</h1>
        <table id="rfpTable" class="display w-full bg-white shadow-md rounded-lg">
            <thead class="bg-gray-200">
                <tr>
                    <th class="p-3 text-left">Subject</th>
                    <th class="p-3 text-left">Online Link</th>
                    <th class="p-3 text-left">Due Date</th>
                    <th class="p-3 text-left">Agency</th>
                    <th class="p-3 text-left">Reference</th>
                    <th class="p-3 text-left">Contact</th>
                </tr>
            </thead>
            <tbody>
                <!-- Data will be populated dynamically -->
            </tbody>
        </table>
    </div>

    <script>
        $(document).ready(function() {
            // Initialize DataTable
            const table = $('#rfpTable').DataTable({
                paging: true,
                searching: true,
                ordering: true,
                info: true,
                lengthChange: true,
                pageLength: 10,
                columnDefs: [
                    { width: '20%', targets: 0 },
                    { width: '25%', targets: 1 },
                    { width: '15%', targets: 2 },
                    { width: '20%', targets: 3 },
                    { width: '15%', targets: 4 },
                    { width: '15%', targets: 5 }
                ]
            });

            // Fetch data from JSON file
            $.ajax({
                url: '/rfp_due_today.json',
                method: 'GET',
                dataType: 'json',
                success: function(data) {
                    if (data && data.length > 0) {
                        data.forEach(row => {
                            table.row.add([
                                row.subject,
                                `<a href="${row.online_link}" class="text-blue-600 hover:underline" target="_blank">View Opportunity</a>`,
                                row.formatted_date,
                                row.agency,
                                row.reference,
                                row.contact
                            ]).draw();
                        });
                    } else {
                        table.row.add([
                            'No RFPs Due Today',
                            '',
                            '',
                            '',
                            '',
                            ''
                        ]).draw();
                    }
                },
                error: function(xhr, status, error) {
                    console.error('Error fetching RFP data:', error);
                    table.row.add([
                        'Error Loading Data',
                        '',
                        '',
                        '',
                        '',
                        ''
                    ]).draw();
                }
            });
        });
    </script>
</body>
</html>
"""

def retry_on_transient_error(max_attempts=3, backoff_factor=1):
    """Decorator to retry on transient HttpError with exponential backoff."""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            attempts = 0
            while attempts < max_attempts:
                try:
                    return func(*args, **kwargs)
                except HttpError as e:
                    transient_codes = {429, 500, 502, 503, 504}
                    if e.resp.status not in transient_codes:
                        raise
                    attempts += 1
                    if attempts == max_attempts:
                        raise
                    sleep_time = backoff_factor * (2 ** (attempts - 1))
                    logging.warning(f"Transient error {e.resp.status} in {func.__name__}, retrying in {sleep_time}s (attempt {attempts}/{max_attempts})")
                    time.sleep(sleep_time)
        return wrapper
    return decorator

@retry_on_transient_error()
def authenticate_google() -> tuple[build, build]:
    """
    Authenticates Gmail and Calendar APIs using token-based credentials.
    Ensures a refresh token is issued and enforces a 6-month validity by tracking creation time.
    """
    creds = None
    token_path = 'token.json'
    token_creation_time = None
    six_months = timedelta(days=183)  # Approximate 6 months

    # Load credentials from token.json if it exists
    if os.path.exists(token_path):
        try:
            with open(token_path, 'r') as token_file:
                token_data = json.load(token_file)
            creds = Credentials.from_authorized_user_info(token_data, SCOPES)
            token_creation_time = token_data.get('creation_time')
            logging.info(f"Loaded credentials from {token_path}. Refresh token present: {creds.refresh_token is not None}")
        except (ValueError, json.JSONDecodeError) as e:
            logging.error(f"Failed to load credentials from {token_path}: {e}")
            print(f"Error: Invalid {token_path} file. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None
            token_creation_time = None

    # Check if token is older than 6 months
    if token_creation_time:
        try:
            creation_dt = datetime.fromisoformat(token_creation_time)
            current_dt = datetime.now(pytz.UTC)
            if current_dt - creation_dt > six_months:
                logging.info(f"Token is older than 6 months (created: {token_creation_time}). Forcing re-authentication.")
                print("Token expired (older than 6 months). Re-authenticating...")
                os.remove(token_path)
                creds = None
        except ValueError as e:
            logging.error(f"Invalid creation_time in {token_path}: {e}. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None

    # Check if credentials are valid or can be refreshed
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                creds.refresh(Request())
                logging.info("Successfully refreshed access token.")
            except Exception as e:
                logging.error(f"Failed to refresh token: {e}")
                print(f"Error: Failed to refresh token ({e}). Re-authenticating...")
                if os.path.exists(token_path):
                    os.remove(token_path)
                creds = None
        
        # Run OAuth flow if no valid credentials
        if not creds:
            try:
                flow = InstalledAppFlow.from_client_secrets_file('client.json', SCOPES)
                creds = flow.run_local_server(
                    port=8080,
                    access_type='offline',  # Ensure refresh token is issued
                    prompt='consent'        # Force consent screen to get refresh token
                )
                logging.info(f"OAuth flow completed. Refresh token obtained: {creds.refresh_token is not None}")
            except FileNotFoundError:
                print("Error: 'client.json' file not found. Please download OAuth 2.0 credentials from Google Cloud Console.")
                raise
            except Exception as e:
                print(f"Error: OAuth flow failed: {e}. Ensure your Google account is authorized and the browser flow completes successfully.")
                raise
        
        # Save credentials with creation time
        if creds:
            try:
                token_data = json.loads(creds.to_json())
                token_data['creation_time'] = datetime.now(pytz.UTC).isoformat()  # Store creation time
                with open(token_path, 'w') as token:
                    json.dump(token_data, token, indent=2)
                print(f"Credentials saved to {token_path}")
                logging.info(f"Saved credentials to {token_path}. Refresh token: {creds.refresh_token is not None}, Creation time: {token_data['creation_time']}")
            except Exception as e:
                logging.error(f"Failed to save credentials to {token_path}: {e}")
                print(f"Warning: Failed to save credentials to {token_path}: {e}")
        else:
            print("Error: No valid credentials obtained from OAuth flow.")
            raise ValueError("Authentication failed: No valid credentials obtained.")

    # Build API services
    try:
        gmail_service = build('gmail', 'v1', credentials=creds)
        calendar_service = build('calendar', 'v3', credentials=creds)
        return gmail_service, calendar_service
    except Exception as e:
        print(f"Error: Failed to build API services: {e}")
        raise

@retry_on_transient_error()
def send_alert_email(gmail_service: build, recipient: str, events: List[Dict]) -> None:
    """Sends an alert email with RFP opportunities in an HTML table."""
    if not events:
        logging.info(f"No events to send for recipient {recipient}")
        return

    # Validate email address format
    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    if not re.match(email_pattern, recipient):
        logging.error(f"Invalid recipient email address: {recipient}")
        print(f"Error: Invalid recipient email address: {recipient}")
        return

    # Create HTML body with inline CSS
    html_body = """
    <html>
    <head>
        <style>
            body { font-family: Arial, sans-serif; margin: 0; padding: 20px; }
            h1 { font-size: 24px; margin-bottom: 20px; }
            table { width: 100%; border-collapse: collapse; margin-top: 10px; }
            th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
            th { background-color: #f2f2f2; font-weight: bold; }
            a { color: #1a73e8; text-decoration: none; }
            a:hover { text-decoration: underline; }
        </style>
    </head>
    <body>
        <h1>RFP Opportunities Due Today</h1>
        <table>
            <thead>
                <tr>
                    <th>Subject</th>
                    <th>Online Link</th>
                    <th>Due Date</th>
                    <th>Agency</th>
                    <th>Reference</th>
                    <th>Contact</th>
                </tr>
            </thead>
            <tbody>
    """
    for event in events:
        html_body += """
                <tr>
                    <td>{subject}</td>
                    <td><a href="{online_link}">{online_link}</a></td>
                    <td>{formatted_date}</td>
                    <td>{agency}</td>
                    <td>{reference}</td>
                    <td>{contact}</td>
                </tr>
        """.format(
            subject=event['subject'],
            online_link=event['online_link'],
            formatted_date=event['formatted_date'],
            agency=event['agency'],
            reference=event['reference'],
            contact=event['contact']
        )
    html_body += """
            </tbody>
        </table>
    </body>
    </html>
    """

    # Create plain text fallback
    plain_text_body = "RFP Opportunities Due Today\n\n"
    for event in events:
        plain_text_body += f"Subject: {event['subject']}\n"
        plain_text_body += f"Online Link: {event['online_link']}\n"
        plain_text_body += f"Due Date: {event['formatted_date']}\n"
        plain_text_body += f"Agency: {event['agency']}\n"
        plain_text_body += f"Reference: {event['reference']}\n"
        plain_text_body += f"Contact: {event['contact']}\n\n"

    # Create MIME message
    message = MIMEMultipart('alternative')
    message['to'] = recipient
    message['subject'] = 'RFP Opportunities Due Today'
    message['from'] = 'me'

    # Attach plain text and HTML parts
    part1 = MIMEText(plain_text_body, 'plain')
    part2 = MIMEText(html_body, 'html')
    message.attach(part1)
    message.attach(part2)

    # Encode the message
    raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode('utf-8')

    try:
        message = gmail_service.users().messages().send(
            userId='me',
            body={'raw': raw_message}
        ).execute()
        logging.info(f"Sent alert email to {recipient} with {len(events)} events")
        print(f"Sent alert email to {recipient}")
    except HttpError as e:
        logging.error(f"Failed to send alert email to {recipient}: {e}")
        print(f"Error: Failed to send alert email to {recipient}: {e}")

def check_and_send_alerts(gmail_service: build, active_events: List[Dict], current_date: datetime) -> None:
    """Checks for events due today, sends alerts, and saves to JSON."""
    ny_tz = pytz.timezone('America/New_York')
    today = current_date.date()

    # Find events due today
    due_today = []
    for event in active_events:
        try:
            event_date = datetime.strptime(event['formatted_date'], '%Y-%m-%d').date()
            if event_date == today:
                due_today.append(event)
        except ValueError as e:
            logging.error(f"Failed to parse event date {event['formatted_date']}: {e}")
            continue

    # Save due_today to JSON file
    with open('rfp_due_today.json', 'w') as f:
        json.dump(due_today, f, indent=2)
    logging.info(f"Saved {len(due_today)} events due today to rfp_due_today.json")

    if due_today:
        logging.info(f"Found {len(due_today)} events due today")
        for recipient in ALERT_RECIPIENTS:
            send_alert_email(gmail_service, recipient, due_today)
    else:
        logging.info("No events due today")

def run_scheduler(gmail_service: build, calendar_service: build):
    """Runs the scheduler in a separate thread to check for alerts daily at the specified time."""
    def scheduler_loop():
        while True:
            schedule.run_pending()
            time.sleep(60)  # Check every minute

    # Validate ALERT_TIME format
    try:
        datetime.strptime(ALERT_TIME, '%H:%M')
    except ValueError as e:
        error_msg = f"Invalid ALERT_TIME format: {ALERT_TIME}. Please use HH:MM format (e.g., '09:30')."
        logging.error(error_msg)
        print(f"Error: {error_msg}")
        raise ValueError(error_msg)

    # Schedule the alert check at the specified time daily
    ny_tz = pytz.timezone('America/New_York')
    schedule.every().day.at(ALERT_TIME).do(
        lambda: list_all_emails(gmail_service, calendar_service, run_alerts=True)
    ).timezone = ny_tz

    # Start scheduler in a separate thread
    scheduler_thread = threading.Thread(target=scheduler_loop, daemon=True)
    scheduler_thread.start()
    logging.info(f"Started scheduler for daily alerts at {ALERT_TIME}")
    print(f"⏰ Scheduler started for daily alerts at {ALERT_TIME}")

def list_all_emails(gmail_service: build, calendar_service: build, run_alerts: bool = False) -> None:
    """Fetches and displays emails from the inbox with categorization using batch processing."""
    try:
        messages = []
        next_page_token = None
        google_service_count = 0
        daily_bids_count = 0
        api_error_count = 0
        dedup_skip_count = 0
        data_skip_count = 0

        # Calculate date for one year ago
        one_year_ago = datetime.now(pytz.UTC) - timedelta(days=365)
        query = f"after:{one_year_ago.strftime('%Y/%m/%d')}"
        logging.info(f"Fetching emails with query: {query}")

        # Fetch all message IDs with retry
        @retry_on_transient_error()
        def fetch_messages(page_token):
            return gmail_service.users().messages().list(
                userId='me',
                maxResults=1000,
                pageToken=page_token,
                q=query
            ).execute()

        while True:
            try:
                result = fetch_messages(next_page_token)
                messages.extend(result.get('messages', []))
                next_page_token = result.get('nextPageToken')
                logging.info(f"Fetched {len(result.get('messages', []))} messages. Next page token: {next_page_token}")
                if not next_page_token:
                    break
            except HttpError as e:
                if e.resp.status == 403:
                    logging.error(f"Quota exceeded: {e}")
                    print("Error: Gmail API quota exceeded. Try again later or increase quota in Google Cloud Console.")
                    raise
                raise

        print(f"📩 Total Emails Found: {len(messages)}")

        opportunities = []
        message_data_dict = {}
        batch_requests = 0
        seen_link_ref_pairs = set()
        seen_opportunities = set()

        # Define batch callback function
        def batch_callback(request_id, response, exception):
            nonlocal api_error_count
            if exception is not None:
                api_error_count += 1
                error_msg = f"Batch request error for message {request_id}: {str(exception)}"
                logging.error(error_msg)
                print(f"❌ {error_msg}")
                message_data_dict[request_id] = {
                    'error': error_msg
                }
            else:
                message_data_dict[request_id] = response

        # Retry wrapper for batch execution
        def execute_batch_with_retry(batch, max_attempts=3, backoff_factor=1):
            attempts = 0
            while attempts < max_attempts:
                try:
                    batch.execute()
                    return
                except HttpError as e:
                    if e.resp.status != 429:
                        raise
                    attempts += 1
                    if attempts == max_attempts:
                        raise
                    sleep_time = backoff_factor * (2 ** (attempts - 1))
                    logging.warning(f"Rate limit error in batch execution, retrying in {sleep_time}s (attempt {attempts}/{max_attempts})")
                    time.sleep(sleep_time)

        # First pass: Fetch headers using metadata format
        batch = gmail_service.new_batch_http_request(callback=batch_callback)
        for msg in messages:
            batch.add(
                gmail_service.users().messages().get(
                    userId='me',
                    id=msg['id'],
                    format='metadata',
                    metadataHeaders=['From', 'Subject']
                ),
                request_id=msg['id']
            )
            batch_requests += 1
            if batch_requests >= 25:  # Further reduced batch size
                execute_batch_with_retry(batch)
                time.sleep(1.0)  # Increased delay
                batch = gmail_service.new_batch_http_request(callback=batch_callback)
                batch_requests = 0
        if batch_requests > 0:
            execute_batch_with_retry(batch)

        # Process headers and identify relevant messages
        relevant_message_ids = []
        for msg in messages:
            msg_data = message_data_dict.get(msg['id'], {})
            if 'error' in msg_data:
                opportunities.append({
                    'subject': "No Subject",
                    'online_link': "NOT AVAILABLE",
                    'event_date': "NOT AVAILABLE",
                    'agency': "NOT AVAILABLE",
                    'reference': "NOT AVAILABLE",
                    'contact': "NOT AVAILABLE",
                    'raw_subject': "No Subject",
                    'extraction_error': msg_data['error']
                })
                continue

            headers = msg_data.get('payload', {}).get('headers', [])
            if is_google_service_email(headers):
                google_service_count += 1
                continue
            if is_daily_bids_alert_email(headers):
                daily_bids_count += 1
                continue
            relevant_message_ids.append(msg['id'])

        # Second pass: Fetch full email data for relevant messages
        batch = gmail_service.new_batch_http_request(callback=batch_callback)
        batch_requests = 0
        message_data_dict.clear()

        for i, msg_id in enumerate(relevant_message_ids, 1):
            print(f"\r🔍 Processing email {i}/{len(relevant_message_ids)}...", end="", flush=True)
            batch.add(
                gmail_service.users().messages().get(
                    userId='me',
                    id=msg_id,
                    format='full'
                ),
                request_id=msg_id
            )
            batch_requests += 1
            if batch_requests >= 25:  # Further reduced batch size
                execute_batch_with_retry(batch)
                time.sleep(1.0)  # Increased delay
                batch = gmail_service.new_batch_http_request(callback=batch_callback)
                batch_requests = 0
        if batch_requests > 0:
            execute_batch_with_retry(batch)

        # Process fetched full emails
        for i, msg_id in enumerate(relevant_message_ids, 1):
            msg_data = message_data_dict.get(msg_id, {})
            if 'error' in msg_data:
                opportunities.append({
                    'subject': "No Subject",
                    'online_link': "NOT AVAILABLE",
                    'event_date': "NOT AVAILABLE",
                    'agency': "NOT AVAILABLE",
                    'reference': "NOT AVAILABLE",
                    'contact': "NOT AVAILABLE",
                    'raw_subject': "No Subject",
                    'extraction_error': msg_data['error']
                })
                continue

            headers = msg_data.get('payload', {}).get('headers', [])
            
            # Decode subject header with MIME encoding support
            raw_subject = "No Subject"
            for header in headers:
                if header['name'].lower() == 'subject':
                    decoded = email.header.decode_header(header['value'])
                    subject_parts = []
                    for part, encoding in decoded:
                        if isinstance(part, bytes):
                            encoding = encoding or 'utf-8'
                            try:
                                subject_parts.append(part.decode(encoding))
                            except (UnicodeDecodeError, LookupError):
                                subject_parts.append(part.decode('utf-8', errors='replace'))
                        else:
                            subject_parts.append(part)
                    raw_subject = ''.join(subject_parts)
                    break

            body = get_email_body(msg_data.get('payload', {}))

            if raw_subject == "New RfP mail":
                details = process_new_rfp_mail(raw_subject, body)
            else:
                if raw_subject == "New RfP From Bid Mail" and body in ["No body content available.", ""]:
                    details = process_no_body_rfp_email(raw_subject)
                else:
                    details = extract_fields(body, raw_subject)
                
                # Apply new subject logic if title is present and original subject is "No Subject"
                if details['subject'] == "No Subject" and details.get('title'):
                    raw_subject = f"{details.get('title', 'Opportunity Details')}"

                # Fallback to preserve original subject if it becomes "No Subject" based on online link
                online_link = details['online_link']
                if details['subject'] == "No Subject" and online_link != "NOT AVAILABLE":
                    if any(link in online_link for link in [
                        "sam.gov",
                        "dibbs.bsm.dla.mil",
                        "passport.cityofnewyork.us"
                    ]):
                        details['subject'] = clean_subject(raw_subject, body)

                # Skip opportunities where both online_link and reference are "NOT AVAILABLE"
                if details['online_link'] == "NOT AVAILABLE" and details['reference'] == "NOT AVAILABLE":
                    data_skip_count += 1
                    error_msg = "Skipped: Missing both online link and reference"
                    print(f"⚠️ {error_msg} for message {msg_id}")
                    details['extraction_error'] = error_msg
                    opportunities.append({
                        'subject': details['subject'],
                        'online_link': details['online_link'],
                        'event_date': details['event_date'],
                        'agency': details['agency'],
                        'reference': details['reference'],
                        'contact': details['contact'],
                        'raw_subject': raw_subject,
                        'extraction_error': details['extraction_error']
                    })
                    continue

                # Modified deduplication logic: Check for duplicate link, reference, and due date
                if online_link != "NOT AVAILABLE":
                    if details['reference'] != "NOT AVAILABLE":
                        link_ref_pair = (online_link, details['reference'])
                    else:
                        link_ref_pair = (online_link, details['event_date'])
                    
                    if link_ref_pair in seen_link_ref_pairs:
                        dedup_skip_count += 1
                        error_msg = "Duplicate Opportunity (based on link and reference or due date)"
                        # print(f"⚠️ {error_msg} for message {msg_id}")
                        details['extraction_error'] = error_msg
                        opportunities.append({
                            'subject': details['subject'],
                            'online_link': details['online_link'],
                            'event_date': details['event_date'],
                            'agency': details['agency'],
                            'reference': details['reference'],
                            'contact': details['contact'],
                            'raw_subject': raw_subject,
                            'extraction_error': details['extraction_error']
                        })
                        continue
                    seen_link_ref_pairs.add(link_ref_pair)

                if all(details[field] == "NOT AVAILABLE" for field in ['online_link', 'event_date', 'agency', 'reference', 'contact']):
                    data_skip_count += 1
                    error_msg = "No meaningful data extracted"
                    print(f"⚠️ {error_msg} for message {msg_id}")
                    details['extraction_error'] = error_msg
                    opportunities.append({
                        'subject': details['subject'],
                        'online_link': details['online_link'],
                        'event_date': details['event_date'],
                        'agency': details['agency'],
                        'reference': details['reference'],
                        'contact': details['contact'],
                        'raw_subject': raw_subject,
                        'extraction_error': details['extraction_error']
                    })
                    continue

                # Deduplication based on key fields
                opportunity_key = (
                    details['online_link'],
                    details['reference'],
                    details['event_date'],
                    details['agency']
                )
                if opportunity_key in seen_opportunities:
                    dedup_skip_count += 1
                    error_msg = "Duplicate opportunity"
                    print(f"⚠️ {error_msg} for message {msg_id}")
                    details['extraction_error'] = error_msg
                else:
                    seen_opportunities.add(opportunity_key)

                opportunities.append({
                    'subject': details['subject'],
                    'online_link': details['online_link'],
                    'event_date': details['event_date'],
                    'agency': details['agency'],
                    'reference': details['reference'],
                    'contact': details['contact'],
                    'raw_subject': raw_subject,
                    'extraction_error': details['extraction_error']
                })

        current_date = get_current_date_from_calendar(calendar_service)
        active_events, expired_events, unparsed_events = categorize_events(opportunities, current_date)
        
        # Check and send alerts if triggered by scheduler
        if run_alerts:
            check_and_send_alerts(gmail_service, active_events, current_date)
            return  # Exit early to avoid printing tables during scheduled runs

        print(f"\n🚫 Filtered {google_service_count} Google service emails")
        print(f"🚫 Filtered {daily_bids_count} Daily Bids Alert emails")
        print(f"⚠️ Skipped {api_error_count} emails due to API errors")
        print(f"⚠️ Skipped {dedup_skip_count} emails due to deduplication")
        print(f"⚠️ Skipped {data_skip_count} emails due to missing data")
        print(f"📋 Processed {len(opportunities)} opportunities")
        print("🕒 Timezone: America/New_York")
        
        display_events_table(active_events, "🟢 ACTIVE RFP OPPORTUNITIES ", True)
        display_events_table(expired_events, "🔴 RECENTLY EXPIRED RFPs (Last 90 days only)", False)
        display_events_table(unparsed_events, "🟡 RFPs WITH MISSING OR UNPARSED DATES", False)

    except HttpError as error:
        print(f"❌ An error occurred: {error}")

def get_current_date_from_calendar(calendar_service: build) -> datetime:
    """Gets the current date from Google Calendar, normalized to midnight (day 0)."""
    ny_tz = pytz.timezone('America/New_York')
    now = datetime.now(ny_tz)
    return ny_tz.localize(datetime(now.year, now.month, now.day, 0, 0, 0))

def get_email_body(payload: dict) -> str:
    """Extracts the email body content."""
    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] in ['text/plain', 'text/html']:
                return base64.urlsafe_b64decode(part['body']['data']).decode('utf-8', errors="ignore")
            body = get_email_body(part)
            if body:
                return body
    if 'body' in payload and 'data' in payload['body']:
        return base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8', errors="ignore")
    return "No body content available."

def clean_subject(subject: str, body: str) -> str:
    """Clean subject line, removing unwanted prefixes and date info, with body fallback."""
    prefixes = ['Fwd:', 'FW:', 'RE:']
    for prefix in prefixes:
        subject = re.sub(f'^{prefix}\\s*', '', subject, flags=re.IGNORECASE)
    subject = re.sub(r'\bdue\b\s*[-:]\s*(\d{1,2}/\d{1,2}/\d{4}|[A-Za-z]{3}\s+\d{1,2},\s+\d{4}|.*\d{4}).*$',
                     '', subject, flags=re.IGNORECASE)
    date_patterns = [
        r'\b\d{1,2}/\d{1,2}/\d{4}\b',
        r'\b[A-Za-z]{3},\s+[A-Za-z]{3}\s+\d{1,2}(?:st|nd|rd|th)?\s+\d{4},\s+\d{02}:\d{02}\b',
        r'\b\d{4}-\d{2}-\d{2}\b'
    ]
    for pattern in date_patterns:
        subject = re.sub(pattern, '', subject, flags=re.IGNORECASE)
    subject = re.sub(r'\s+', ' ', subject).strip()
    subject = re.sub(r'[.,;:!?-]+$', '', subject)
    if not subject or subject == "No Subject":
        if body and body != "No body content available.":
            lines = body.split('\n')
            for line in lines[:5]:
                line = line.strip()
                if line and len(line) > 5 and not line.startswith(('http', 'Agency', 'Reference', 'Contact', 'Due')):
                    return re.sub(r'\s+', ' ', line)[:50]
        return "Untitled Opportunity"
    return subject if subject else "Untitled Opportunity"

def is_google_service_email(headers: List[Dict[str, str]]) -> bool:
    """Check if email is from Google services."""
    google_domains = [
        'google.com',
        'googleapis.com',
        'gmail.com',
        'accounts.google.com',
        'mail.google.com'
    ]
    google_subject_keywords = [
        'Google Account',
        'Gmail',
        'Google Security',
        'Google Workspace',
        'Google Cloud'
    ]
    from_header = next((h['value'] for h in headers if h['name'] == 'From'), '')
    if any(domain in from_header for domain in google_domains):
        return True
    subject = next((h['value'] for h in headers if h['name'] == 'Subject'), '').lower()
    if any(keyword.lower() in subject for keyword in google_subject_keywords):
        return True
    return False

def is_daily_bids_alert_email(headers: List[Dict[str, str]]) -> bool:
    """Check if email is a Daily or Weekly Bids Alert."""
    subject = next((h['value'] for h in headers if h['name'] == 'Subject'), '').lower()
    bids_patterns = [
        r'\bdaily\s*bids?\s*alert\b',
        r'\bweekly\s*bids?\s*alert\b',
        r'\bbids?\s*alert\b',
        r'\bbid\s*digest\b',
        r'\bbid\s*summary\b',
        r'\bbid\s*notification\b'
    ]
    return any(re.search(pattern, subject, re.IGNORECASE) for pattern in bids_patterns)

def extract_fields(body: str, subject: str) -> Dict[str, str]:
    """Extracts and cleans all relevant fields from email content."""
    result = {
        'online_link': "NOT AVAILABLE",
        'event_date': "NOT AVAILABLE",
        'agency': "NOT AVAILABLE",
        'reference': "NOT AVAILABLE",
        'contact': "NOT AVAILABLE",
        'extraction_error': None,
        'subject': clean_subject(subject, body)
    }
    is_forwarded = subject.lower().startswith(('fwd:', 'fw:'))
    if is_forwarded:
        result['event_date'] = extract_due_date_from_subject(subject)
    if body not in ["No body content available.", ""]:
        body = re.sub(r'<[^>]+>', '', body)
        body = re.sub(r'\s+', ' ', body).strip()
        result['online_link'] = extract_url_from_body(body)
        if result['event_date'] == "NOT AVAILABLE":
            date_patterns = [
                r'(\w{3}, \w{3} \d{1,2}(?:st|nd|rd|th)? \d{4}, \d{02}:\d{02})',
                r'(\w{3} \d{1,2}, \d{4} at \d{1,2}:\d{02} [AP]M)',
                r'(\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{02})',
                r'DUE\s*-\s*(\w{3}, \w{3} \d{1,2}, \d{4})',
                r'due\s*-\s*(\w{3}, \w{3} \d{1,2}, \d{4})',
                r'(\w{3}\s+\d{1,2}, \d{4}\s+at\s+\d{1,2}:\d{02}\s*[AP]M)',
                r'(\d{1,2}/\d{1,2}/\d{4}\s+\d{1,2}:\d{02}(?::\d{02})?\s*[AP]M)'
            ]
            for pattern in date_patterns:
                match = re.search(pattern, body, re.IGNORECASE)
                if match:
                    result['event_date'] = format_extracted_date(match.group(1))
                    break
    else:
        result['event_date'] = extract_due_date_from_subject(subject)
    ref_contact = extract_reference_and_contact(body, subject)
    result['reference'] = ref_contact['reference']
    result['contact'] = ref_contact['contact']
    result['agency'] = ref_contact['agency'] if ref_contact['agency'] != "NOT AVAILABLE" else clean_agency_name("", subject)
    return result

def extract_due_date_from_subject(subject: str) -> str:
    """Enhanced date extraction with comprehensive pattern matching."""
    subject = subject.strip()
    patterns = [
        r'(?:DUE|due|Due)[\s:-]*(?:date\s+is|date:?)[\s-]*([A-Za-z]{3},\s*[A-Za-z]{3}\s+\d{1,2}(?:st|nd|rd|th)?\s+\d{4},\s*\d{02}:\d{02})',
        r'(?:DUE|due|Due)[\s:-]*(?:date|date:)[\s-]*(\d{1,2}/\d{1,2}/\d{4}\s+\d{1,2}:\d{02}(?::\d{02})?\s*[AP]M)',
        r'(?:DUE|due|Due)[\s:-]*(?:date|date:)?[\s-]*([A-Za-z]{3}\s+\d{1,2}(?:st|nd|rd|th)?\s+\d{4})',
        r'(?:DUE|due|Due)[\s:-]*(?:date|date:)?[\s-]*(\d{1,2}\s+[A-Za-z]{3}\s+\d{4})',
        r'(?:DUE|due|Due)[\s:-]*(?:date|date:)?[\s-]*([A-Za-z]{3},\s*[A-Za-z]{3}\s+\d{1,2}(?:st|nd|rd|th)?\s+\d{4})',
        r'(?:DUE|due|Due)\s*[-:]\s*([A-Za-z]{3}\s+\d{1,2},\s+\d{4})',
        r'(?:DUE|due|Due)\s*[-:]\s*(\d{1,2}\s+[A-Za-z]{3}\s+\d{4})',
        r'(?:DUE|due|Due)\s*[-:]\s*(\d{1,2}/\d{1,2}/\d{4})',
        r'\b(?:closing|submit|deadline|due)\s*(?:date|by)?\s*[-:]\s*([A-Za-z]{3}\s+\d{1,2},\s+\d{4})',
        r'\b(?:closing|submit|deadline|due)\s*(?:date|by)?\s*[-:]\s*(\d{1,2}\s+[A-Za-z]{3}\s+\d{4})',
        r'\b(?:closing|submit|deadline|due)\s*(?:date|by)?\s*[-:]\s*(\d{1,2}/\d{1,2}/\d{4})',
        r'\b([A-Za-z]{3},\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{4},\s+\d{02}:\d{02})\b',
        r'\b([A-Za-z]{3}\s+\d{1,2},\s+\d{4}\s+at\s+\d{1,2}:\d{02}\s*[AP]M)\b',
        r'\b([A-Za-z]{3}\s+\d{1,2},\s+\d{4})\b',
        r'\b(\d{1,2}\s+[A-Za-z]{3}\s+\d{4})\b',
        r'\b(\d{1,2}/\d{1,2}/\d{4})\b',
    ]
    for pattern in patterns:
        match = re.search(pattern, subject, re.IGNORECASE)
        if match:
            date_str = match.group(1)
            date_str = re.sub(r'(st|nd|rd|th)', '', date_str)
            date_str = re.sub(r'^u\s*,', '', date_str, flags=re.IGNORECASE)
            date_str = re.sub(r'\s+', ' ', date_str).strip()
            formatted_date = format_extracted_date(date_str)
            if formatted_date != "NOT AVAILABLE":
                return formatted_date
    return "NOT AVAILABLE"

def format_extracted_date(date_str: str) -> str:
    """Formats extracted date string into standard format with improved parsing."""
    logging.debug(f"Attempting to parse date: {date_str}")
    try:
        date_str = re.sub(r'[^\w\s/:,-]', '', date_str)
        date_str = re.sub(r'\s+', ' ', date_str).strip()
        date_str = re.sub(r'(st|nd|rd|th)', '', date_str, flags=re.IGNORECASE)
        date_str = re.sub(r'^u\s*,', '', date_str, flags=re.IGNORECASE)
        dt = parse_date(date_str, fuzzy=True)
        formatted = dt.strftime('%a, %b %d %Y, %H:%M')
        logging.debug(f"Formatted date: {formatted}")
        return formatted
    except Exception as e:
        logging.error(f"Failed to parse date: {date_str}, error: {str(e)}")
        return "NOT AVAILABLE"

def extract_url_from_body(body: str) -> str:
    """Extracts URL from body excluding govdirections URLs."""
    if body == "No body content available.":
        return "NOT AVAILABLE"
    url_pattern = r'(https?://[^\s\'">]+)'
    matches = re.findall(url_pattern, body, re.IGNORECASE)
    for url in matches:
        url = url.strip()
        url = re.sub(r'[.,;:!?)\]\s]+$', '', url)
        url = re.sub(r'Event$', '', url, flags=re.IGNORECASE)
        if 'govdirections' not in url.lower() and re.match(r'^https?://[^\s/$.?#].[^\s]*$', url):
            return url
    return "NOT AVAILABLE"

def extract_reference_and_contact(body: str, subject: str) -> Dict[str, str]:
    """Extracts reference, contact, and agency from both body and subject."""
    result = {
        'reference': extract_reference_from_subject(subject),
        'contact': "NOT AVAILABLE",
        'agency': "NOT AVAILABLE"
    }
    if body not in ["No body content available.", ""]:
        block_match = re.search(
            r'The agency sponsor is:\s*(.*?)?\s*The reference for this notice \(if available\):\s*(.*?)\s*Agency Contact Information:\s*(.*?)(?:\n|$)',
            body,
            re.IGNORECASE | re.DOTALL
        )
        if block_match:
            agency = block_match.group(1).strip()
            if agency:
                result['agency'] = clean_agency_name(agency, subject)
            reference = block_match.group(2).strip()
            if reference and is_valid_reference(reference):
                result['reference'] = reference
            contact = block_match.group(3).strip()
            phone_match = re.search(r'(\d{3}-\d{3}-\d{4})', contact)
            if phone_match:
                digits = re.sub(r'\D', '', phone_match.group(1))
                if len(digits) == 10:
                    result['contact'] = f"{digits[:3]}-{digits[3:6]}-{digits[6:]}"
        if result['contact'] == "NOT AVAILABLE":
            phone_match = re.search(r'Agency Contact Information:\s*(\d{3}-\d{3}-\d{4})', body, re.IGNORECASE)
            if phone_match:
                digits = re.sub(r'\D', '', phone_match.group(1))
                if len(digits) == 10:
                    result['contact'] = f"{digits[:3]}-{digits[3:6]}-{digits[6:]}"
    return result

def clean_agency_name(agency_text: str, subject: str) -> str:
    """Cleans agency names with fallback to subject extraction."""
    if agency_text:
        agency_text = re.split(r'(?:\bthe\b|\breference\b|\bnotice\b|\bcontact\b|\brfp\b|\bbid\b|\bsolicitation\b)',
                              agency_text, flags=re.IGNORECASE)[0]
        agency_text = re.sub(r'[^a-zA-Z0-9\s,&-]+$', '', agency_text).strip()
        agency_text = re.sub(r'\d{3}-\d{3}-\d{4}', '', agency_text).strip()
        if agency_text:
            return agency_text[:40] + ("..." if len(agency_text) > 40 else "")
    agency = re.sub(r'^(Fwd:\s*|RE:\s*|RFP\s*|Bid\s*|Solicitation\s*)', '', subject, flags=re.IGNORECASE)
    agency = re.sub(r'-\s*due\s*.*$', '', agency, flags=re.IGNORECASE).strip()
    return agency[:40] + ("..." if len(agency) > 40 else "") if agency else "NOT AVAILABLE"

def extract_reference_from_subject(subject: str) -> str:
    """Extracts reference number from email subject."""
    ref_patterns = [
        r'RFP\s*#?([A-Z0-9-]{3,50})\b',
        r'Bid\s*#?([A-Z0-9-]{3,50})\b',
        r'Solicitation\s*#?([A-Z0-9-]{3,50})\b',
        r'Ref\s*#?([A-Z0-9-]{3,50})\b',
        r'#([A-Z0-9-]{3,50})\b',
        r'\b([A-Z]{2,5}\d{3,8}-?\d{0,5})\b'
    ]
    for pattern in ref_patterns:
        match = re.search(pattern, subject, re.IGNORECASE)
        if match and is_valid_reference(match.group(1)):
            return match.group(1).upper()
    return "NOT AVAILABLE"

def is_valid_reference(ref: str) -> bool:
    """Validate if extracted reference meets requirements."""
    if not ref or not isinstance(ref, str) or ref.upper() == "NOT AVAILABLE":
        return False
    invalid_patterns = [
        r'Agency-Contact-Information',
        r'Learn-to-Do-Business',
        r'Summary-Information',
        r'Competitive-Intelligence',
        r'Regards----Rashi',
        r'\d{3}-\d{3}-\d{4}',
        r'^https?://',
        r'^www\.',
        r'^event',
        r'^view',
        r'^s/'
    ]
    for pattern in invalid_patterns:
        if re.search(pattern, ref, re.IGNORECASE):
            return False
    if len(ref) <= 5 and ref.isupper() and ref.isalpha():
        return False
    return bool(
        3 <= len(ref) <= 50 and
        not ref.startswith(('http', 'www', 'event', 'view', 's/')) and
        any(char.isalnum() for char in ref)
    )

def categorize_events(opportunities: List[Dict], current_date: datetime) -> Tuple[List, List, List]:
    """Categorizes events into active, expired, and unparsed tables, ignoring non-IT keywords for active events."""
    active_events = []
    expired_events = []
    unparsed_events = []
    ny_tz = pytz.timezone('America/New_York')
    
    # Non-IT keywords to ignore for active events
    non_it_keywords = [
        'emergency',
        'licenses/support',
        'vehicle',
        'service emergency',
        'survey',
        'services',
        'data center',
        'earthquake monitoring',
        'support services', 
        'library director', 'legal document', 'fire Extinguisher', 'maintenance', 'sole sources', 'epic trained revenue cycle resources'
    ]

    for event in opportunities:
        event_date_str = event.get('event_date', "NOT AVAILABLE")
        error = event.get('extraction_error')
        if error or event_date_str == "NOT AVAILABLE":
            unparsed_events.append({
                **event,
                'reason': error or "No valid date extracted"
            })
            continue
        try:
            formats = [
                '%a, %b %d %Y, %H:%M',
                '%b %d %Y, %I:%M %p',
                '%a, %b %d %Y',
                '%b %d, %Y',
                '%m/%d/%Y',
                '%d %b %Y',
                '%Y-%m-%d'
            ]
            event_date = None
            for fmt in formats:
                try:
                    event_date = datetime.strptime(event_date_str, fmt)
                    logging.debug(f"Parsed date {event_date_str} with format {fmt}")
                    break
                except ValueError:
                    continue
            if event_date is None:
                event_date = parse_date(event_date_str, fuzzy=True)
                logging.debug(f"Parsed date {event_date_str} with dateutil.parser")
            event_date = ny_tz.localize(event_date)
            days_difference = (event_date - current_date).days
            
            # Check for non-IT keywords in subject or agency for active events
            is_non_it = False
            if days_difference >= 0:
                subject_lower = event['subject'].lower()
                agency_lower = event['agency'].lower()
                for keyword in non_it_keywords:
                    if keyword in subject_lower or keyword in agency_lower:
                        is_non_it = True
                        logging.debug(f"Skipping active event due to non-IT keyword '{keyword}' in subject or agency: {event['subject']}")
                        break
            
            if days_difference >= 0 and not is_non_it:
                active_events.append({
                    **event,
                    'days_to_expire': days_difference,
                    'formatted_date': event_date.strftime('%Y-%m-%d')
                })
            elif days_difference < 0 and abs(days_difference) <= 90:
                expired_events.append({
                    **event,
                    'days_expired': abs(days_difference),
                    'formatted_date': event_date.strftime('%Y-%m-%d')
                })
            else:
                unparsed_events.append({
                    **event,
                    'reason': "Date too far in the past"
                })
        except ValueError as e:
            unparsed_events.append({
                **event,
                'reason': f"Date parsing failed: {str(e)} (input: {event_date_str})"
            })
            logging.error(f"Failed to parse date in categorize_events: {event_date_str}, error: {str(e)}")
    active_events.sort(key=lambda x: x['days_to_expire'])
    expired_events.sort(key=lambda x: x['days_expired'])
    return active_events, expired_events, unparsed_events

def display_events_table(events: List[Dict], title: str, is_active: bool = True) -> None:
    """Displays events in a well-formatted table."""
    if not events:
        print(f"\n{title} (No entries found)")
        return
    table_data = []
    for event in events:
        table_data.append([
            event['subject'][:50] + "..." if len(event['subject']) > 50 else event['subject'],
            event['online_link'],
            event.get('formatted_date', event['event_date']),
            event['agency'],
            event['reference'],
            event['contact']
        ])
    headers = [
        "Subject (50 chars max)",
        "Online Link",
        "Due Date",
        "Agency",
        "Reference",
        "Contact"
    ]
    print(f"\n{title} ({len(events)} found):")
    print(tabulate(
        table_data,
        headers=headers,
        tablefmt="grid",
        maxcolwidths=[50, None, 25, 40, 20, 15],
        stralign="left"
    ))

def process_no_body_rfp_email(subject: str) -> Dict[str, str]:
    """Processes emails with subject 'New RfP From Bid Mail' and no body, extracting fields from subject."""
    result = {
        'subject': clean_subject(subject, "No body content available.")[:50] + "..." if len(clean_subject(subject, "No body content available.")) > 50 else clean_subject(subject, "No body content available."),
        'online_link': "NOT AVAILABLE",
        'event_date': "NOT AVAILABLE",
        'agency': "NOT AVAILABLE",
        'reference': "NOT AVAILABLE",
        'contact': "NOT AVAILABLE",
        'raw_subject': subject,
        'extraction_error': None
    }
    cleaned_subject = clean_subject(subject, "No body content available.")
    result['event_date'] = extract_due_date_from_subject(cleaned_subject)
    result['reference'] = extract_reference_from_subject(cleaned_subject)
    result['agency'] = clean_agency_name("", cleaned_subject)
    phone_match = re.search(r'(\d{3}-\d{3}-\d{4})', cleaned_subject)
    if phone_match:
        digits = re.sub(r'\D', '', phone_match.group(1))
        if len(digits) == 10:
            result['contact'] = f"{digits[:3]}-{digits[3:6]}-{digits[6:]}"
    url_match = re.search(r'(https?://[^\s\'">]+)', cleaned_subject)
    if url_match:
        url = url_match.group(0).strip()
        url = re.sub(r'[.,;:!?)\]\s]+$', '', url)
        if 'govdirections' not in url.lower() and re.match(r'^https?://[^\s/$.?#].[^\s]*$', url):
            result['online_link'] = url
    if all(value == "NOT AVAILABLE" for value in [result['event_date'], result['agency'], result['reference']]):
        result['extraction_error'] = "No meaningful data extracted from subject"
    return result

def process_new_rfp_mail(subject: str, body: str) -> Dict[str, str]:
    """Processes emails with subject 'New RfP mail', extracting fields from table in body."""
    result = {
        'subject': clean_subject(subject, body),
        'online_link': "NOT AVAILABLE",
        'event_date': "NOT AVAILABLE",
        'agency': "NOT AVAILABLE",
        'reference': "NOT AVAILABLE",
        'contact': "NOT AVAILABLE",
        'raw_subject': subject,
        'extraction_error': None
    }
    if body in ["No body content available.", ""]:
        result['extraction_error'] = "No body content available"
        return result
    body = re.sub(r'<[^>]+>', '', body)
    body = re.sub(r'\s+', ' ', body).strip()
    table_pattern = r'Subject \(50 chars max\)\s*\|([^\|]*)\|\s*Online Link\s*\|([^\|]*)\|\s*Event Date\s*\|([^\|]*)\|\s*Agency\s*\|([^\|]*)\|\s*Reference\s*\|([^\|]*)\|\s*Contact\s*\|([^\|]*)\|'
    table_match = re.search(table_pattern, body, re.IGNORECASE)
    if table_match:
        result['subject'] = table_match.group(1).strip()[:50]
        result['online_link'] = table_match.group(2).strip()
        result['event_date'] = format_extracted_date(table_match.group(3).strip())
        result['agency'] = table_match.group(4).strip()[:40]
        result['reference'] = table_match.group(5).strip()
        result['contact'] = table_match.group(6).strip()
        if result['online_link'] and not re.match(r'^https?://[^\s/$.?#].[^\s]*$', result['online_link']):
            result['online_link'] = "NOT AVAILABLE"
        if result['contact'] and not re.match(r'\d{3}-\d{3}-\d{4}', result['contact']):
            result['contact'] = "NOT AVAILABLE"
        if result['reference'] and not is_valid_reference(result['reference']):
            result['reference'] = "NOT AVAILABLE"
        if not result['agency']:
            result['agency'] = clean_agency_name("", subject)
    else:
        result['online_link'] = extract_url_from_body(body)
        date_patterns = [
            r'(\w{3}, \w{3} \d{1,2}(?:st|nd|rd|th)? \d{4}, \d{02}:\d{02})',
            r'(\w{3} \d{1,2}, \d{4} at \d{1,2}:\d{02} [AP]M)',
            r'(\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{02})',
            r'DUE\s*-\s*(\w{3}, \w{3} \d{1,2}, \d{4})',
            r'due\s*-\s*(\w{3}, \w{3} \d{1,2}, \d{4})',
            r'(\w{3}\s+\d{1,2}, \d{4}\s+at\s+\d{1,2}:\d{02}\s*[AP]M)',
            r'(\d{1,2}/\d{1,2}/\d{4}\s+\d{1,2}:\d{02}(?::\d{02})?\s*[AP]M)'
        ]
        for pattern in date_patterns:
            match = re.search(pattern, body, re.IGNORECASE)
            if match:
                result['event_date'] = format_extracted_date(match.group(1))
                break
        ref_contact = extract_reference_and_contact(body, subject)
        result['reference'] = ref_contact['reference']
        result['contact'] = ref_contact['contact']
        result['agency'] = ref_contact['agency'] if ref_contact['agency'] != "NOT AVAILABLE" else clean_agency_name("", subject)
        result['extraction_error'] = "No table found, used fallback extraction"
    if all(value == "NOT AVAILABLE" for value in [result['online_link'], result['event_date'], result['agency'], result['reference'], result['contact']]):
        result['extraction_error'] = "No meaningful data extracted"
    return result

def start_http_server():
    """Starts a simple HTTP server to serve the UI and JSON data."""
    class CustomHandler(http.server.SimpleHTTPRequestHandler):
        def do_GET(self):
            if self.path == '/':
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write(HTML_CONTENT.encode('utf-8'))
            elif self.path == '/rfp_due_today.json':
                try:
                    with open('rfp_due_today.json', 'rb') as f:
                        self.send_response(200)
                        self.send_header('Content-type', 'application/json')
                        self.end_headers()
                        self.wfile.write(f.read())
                except FileNotFoundError:
                    self.send_response(404)
                    self.send_header('Content-type', 'text/plain')
                    self.end_headers()
                    self.wfile.write(b"File not found")
            else:
                super().do_GET()

    # Start the server in a separate thread
    server = socketserver.TCPServer(("", HTTP_PORT), CustomHandler)
    server_thread = threading.Thread(target=server.serve_forever, daemon=True)
    server_thread.start()
    print(f"🌐 HTTP server started at http://localhost:{HTTP_PORT}")

if __name__ == '__main__':
    print("🔐 Authenticating with Google APIs...")
    gmail_service, calendar_service = authenticate_google()
    print("📩 Fetching emails from the past year...")
    list_all_emails(gmail_service, calendar_service)
    print("⏰ Starting alert scheduler...")
    run_scheduler(gmail_service, calendar_service)
    print("🌐 Starting HTTP server for UI...")
    start_http_server()
    # Keep the main thread alive to allow scheduler and server to run
    try:
        while True:
            time.sleep(60)  # Keep main thread alive
    except KeyboardInterrupt:
        print("🛑 Stopping scheduler and server...")
        

🔐 Authenticating with Google APIs...
Credentials saved to token.json
📩 Fetching emails from the past year...
📩 Total Emails Found: 2675
❌ Batch request error for message 197f3c026885d70e: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/197f3c026885d70e?format=metadata&metadataHeaders=From&metadataHeaders=Subject&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">
❌ Batch request error for message 197f3bfb5a9cbe1d: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/197f3bfb5a9cbe1d?format=metadata&metadataHeaders=From&metadataHeaders=Subject&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">
❌ Batch request error for message 197f37f499a3053b: <HttpError 429 when 

KeyboardInterrupt: 