In [None]:
import re
import base64
import os
import pickle
import time
import logging
import html
from urllib.parse import urlparse
from typing import List, Dict
from dotenv import load_dotenv
from datetime import datetime, timedelta
import pytz
import json
from functools import wraps

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from googleapiclient.errors import HttpError
from googleapiclient.http import BatchHttpRequest

from tabulate import tabulate
import textwrap
import tenacity
import schedule  # Added for scheduling

from email.mime.text import MIMEText

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# --- Logging Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Environment Variables ---
load_dotenv('credentials.env')  # Load credentials from .env file

# --- Constants ---
SCOPES_GMAIL_READ = ['https://www.googleapis.com/auth/gmail.readonly']
SCOPES_GMAIL_SEND = ['https://www.googleapis.com/auth/gmail.send']
US_STATES = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
    'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
    'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
    'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
    'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
    'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
    'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
    'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
    'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
    'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia'
]
FOOTER_KEYWORDS = {
    'copyright': ['copyright', 'all rights reserved'],
    'company': ['govdirections', 'llc', 'atlanta', 'georgia'],
    'membership': ['member home page', 'premium membership'],
    'contact': ['customercare', 'email', 'call', 'phone'],
    'navigation': ['unsubscribe', 'preferences', 'account settings']
}
BLUE = '\033[94m'
RESET = '\033[0m'

# --- Retry Decorator for Transient Errors ---
def retry_on_transient_error(max_attempts=3, backoff_factor=1):
    """Decorator to retry on transient HttpError with exponential backoff."""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            attempts = 0
            while attempts < max_attempts:
                try:
                    return func(*args, **kwargs)
                except HttpError as e:
                    transient_codes = {429, 500, 502, 503, 504}
                    if e.resp.status not in transient_codes:
                        raise
                    attempts += 1
                    if attempts == max_attempts:
                        raise
                    sleep_time = backoff_factor * (2 ** (attempts - 1))
                    logger.warning(f"Transient error {e.resp.status} in {func.__name__}, retrying in {sleep_time}s (attempt {attempts}/{max_attempts})")
                    time.sleep(sleep_time)
        return wrapper
    return decorator

# --- Helper Functions for Email Parsing ---
@retry_on_transient_error()
def authenticate_gmail(scopes: list, token_path: str = 'token_read.json') -> build:
    """
    Authenticates Gmail API using token-based credentials.
    Ensures a refresh token is issued and enforces a 6-month validity by tracking creation time.
    """
    creds = None
    token_creation_time = None
    six_months = timedelta(days=183)  # Approximate 6 months

    # Load credentials from token file if it exists
    if os.path.exists(token_path):
        try:
            with open(token_path, 'r') as token_file:
                token_data = json.load(token_file)
            creds = Credentials.from_authorized_user_info(token_data, scopes)
            token_creation_time = token_data.get('creation_time')
            logger.info(f"Loaded credentials from {token_path}. Refresh token present: {creds.refresh_token is not None}")
            # Verify scopes match
            if not all(scope in creds.scopes for scope in scopes):
                logger.warning(f"Credentials in {token_path} lack required scopes: {scopes}. Forcing re-authentication.")
                os.remove(token_path)
                creds = None
                token_creation_time = None
        except (ValueError, json.JSONDecodeError) as e:
            logger.error(f"Failed to load credentials from {token_path}: {e}")
            print(f"Error: Invalid {token_path} file. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None
            token_creation_time = None

    # Check if token is older than 6 months
    if token_creation_time:
        try:
            creation_dt = datetime.fromisoformat(token_creation_time)
            current_dt = datetime.now(pytz.UTC)
            if current_dt - creation_dt > six_months:
                logger.info(f"Token is older than 6 months (created: {token_creation_time}). Forcing re-authentication.")
                print("Token expired (older than 6 months). Re-authenticating...")
                os.remove(token_path)
                creds = None
        except ValueError as e:
            logger.error(f"Invalid creation_time in {token_path}: {e}. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None

    # Check if credentials are valid or can be refreshed
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                creds.refresh(Request())
                logger.info("Successfully refreshed access token.")
            except Exception as e:
                logger.error(f"Failed to refresh token: {e}")
                print(f"Error: Failed to refresh token ({e}). Re-authenticating...")
                if os.path.exists(token_path):
                    os.remove(token_path)
                creds = None
        
        # Run OAuth flow if no valid credentials
        if not creds:
            try:
                flow = InstalledAppFlow.from_client_secrets_file('client.json', scopes)
                creds = flow.run_local_server(
                    port=8080,
                    access_type='offline',  # Ensure refresh token is issued
                    prompt='consent'        # Force consent screen to get refresh token
                )
                logger.info(f"OAuth flow completed. Refresh token obtained: {creds.refresh_token is not None}")
            except FileNotFoundError:
                print("Error: 'client.json' file not found. Please download OAuth 2.0 credentials from Google Cloud Console.")
                raise
            except Exception as e:
                print(f"Error: OAuth flow failed: {e}. Ensure your Google account is authorized and the browser flow completes successfully.")
                raise
        
        # Save credentials with creation time
        if creds:
            try:
                token_data = json.loads(creds.to_json())
                token_data['creation_time'] = datetime.now(pytz.UTC).isoformat()  # Store creation time
                with open(token_path, 'w') as token:
                    json.dump(token_data, token, indent=2)
                print(f"Credentials saved to {token_path}")
                logger.info(f"Saved credentials to {token_path}. Refresh token: {creds.refresh_token is not None}, Creation time: {token_data['creation_time']}")
            except Exception as e:
                logger.error(f"Failed to save credentials to {token_path}: {e}")
                print(f"Warning: Failed to save credentials to {token_path}: {e}")
        else:
            print("Error: No valid credentials obtained from OAuth flow.")
            raise ValueError("Authentication failed: No valid credentials obtained.")

    # Build Gmail service
    try:
        return build('gmail', 'v1', credentials=creds)
    except Exception as e:
        print(f"Error: Failed to build Gmail API service: {e}")
        raise

def get_email_body(payload: dict) -> str:
    """Extract email body from payload."""
    def decode_body(data):
        try:
            return base64.urlsafe_b64decode(data).decode('utf-8', errors="ignore")
        except:
            return None

    if 'parts' in payload:
        for part in payload['parts']:
            if part['mimeType'] in ['text/plain', 'text/html']:
                if 'body' in part and 'data' in part['body']:
                    body = decode_body(part['body']['data'])
                    if body:
                        return body
    if 'body' in payload and 'data' in payload['body']:
        return decode_body(payload['body']['data']) or "No body content available."
    return "No body content available."

def is_state_header(line: str) -> bool:
    """Check if line is a state header."""
    return line.upper() in [state.upper() for state in US_STATES]

def is_special_line(line: str) -> bool:
    """Check if line should be skipped (footer, separator, etc.)."""
    line_lower = line.lower()
    if not line.strip() or re.match(r'^[<>*]+$', line):
        return True
    if any(keyword in line_lower for category in FOOTER_KEYWORDS.values() for keyword in category):
        return True
    return False

def is_new_opportunity(line: str) -> bool:
    """Check if line starts a new opportunity."""
    return re.match(r'^[*-•]\s*', line) is not None

def extract_opportunities(body: str) -> List[Dict[str, str]]:
    """Extract complete opportunities (including due dates) without duplicates."""
    opportunities = []
    current_state = None
    lines = [line.strip() for line in body.split('\n') if line.strip()]

    # Extract all URLs with reference numbers
    url_pattern = re.compile(r'\[(\d+)\]\s*(https?://\S+)')
    url_map = {}
    for line in lines:
        for ref_num, url in url_pattern.findall(line):
            url_map[ref_num] = url

    i = 0
    while i < len(lines):
        line = lines[i]

        # Update current state
        if is_state_header(line):
            current_state = line.upper()
            i += 1
            continue

        if not current_state or is_special_line(line):
            i += 1
            continue

        if is_new_opportunity(line):
            # Collect all lines until next opportunity or state
            opportunity_lines = [line]
            j = i + 1
            while j < len(lines) and not is_new_opportunity(lines[j]) and not is_state_header(lines[j]):
                if lines[j].strip() and not is_special_line(lines[j]):
                    opportunity_lines.append(lines[j])
                j += 1

            # Combine into a single title (preserving due dates)
            full_text = " ".join(opportunity_lines)
            title = clean_title(full_text)

            # Extract reference number
            ref_num = re.search(r'\[(\d+)\]', full_text)
            ref_num = ref_num.group(1) if ref_num else None

            # Only add if title is valid and has a link
            if len(title) > 10 and ref_num in url_map:
                opportunities.append({
                    "state": current_state,
                    "opportunity_title": title,
                    "link": url_map[ref_num]
                })

            i = j  # Skip processed lines
        else:
            i += 1

    return opportunities

def clean_title(title: str) -> str:
    """Clean but preserve full opportunity text (including due dates)."""
    title = re.sub(r'^[*-•]\s*', '', title)  # Remove bullet points
    title = re.sub(r'\[\d+\]', '', title)  # Remove reference numbers
    title = re.sub(r'\s+', ' ', title).strip()  # Normalize spaces
    return title

@retry_on_transient_error()
def list_daily_bids_emails(service: build) -> List[str]:
    """Process ALL Daily Bids Alert emails and return unique URLs using batch requests."""
    try:
        messages = []
        next_page_token = None
        total_emails_processed = 0
        skipped_count = 0
        all_opportunities = []

        # Fetch all message IDs with retry
        @retry_on_transient_error()
        def fetch_messages(page_token):
            return service.users().messages().list(
                userId='me',
                q="subject:'Daily Bids Alert'",
                maxResults=1000,
                pageToken=page_token
            ).execute()

        while True:
            try:
                result = fetch_messages(next_page_token)
                messages.extend(result.get('messages', []))
                next_page_token = result.get('nextPageToken')
                logger.info(f"Fetched {len(result.get('messages', []))} messages. Next page token: {next_page_token}")
                if not next_page_token:
                    break
            except HttpError as e:
                if e.resp.status == 403:
                    logger.error(f"Quota exceeded: {e}")
                    print("Error: Gmail API quota exceeded. Try again later or increase quota in Google Cloud Console.")
                    raise
                raise

        print(f"📩 Total Emails Found: {len(messages)}")

        # Process messages in batches
        message_data_dict = {}
        batch_requests = 0

        # Define batch callback function
        def batch_callback(request_id, response, exception):
            nonlocal skipped_count
            if exception is not None:
                skipped_count += 1
                message_data_dict[request_id] = {
                    'error': f"Batch request error: {str(exception)}"
                }
            else:
                message_data_dict[request_id] = response

        # Fetch full email data in batches
        batch = service.new_batch_http_request(callback=batch_callback)
        for msg in messages:
            batch.add(
                service.users().messages().get(
                    userId='me',
                    id=msg['id'],
                    format='full'
                ),
                request_id=msg['id']
            )
            batch_requests += 1
            if batch_requests >= 100:
                batch.execute()
                batch = service.new_batch_http_request(callback=batch_callback)
                batch_requests = 0
        if batch_requests > 0:
            batch.execute()

        # Process fetched emails
        for i, msg in enumerate(messages, 1):
            print(f"\r🔍 Processing email {i}/{len(messages)}...", end="", flush=True)
            msg_data = message_data_dict.get(msg['id'], {})
            if 'error' in msg_data:
                skipped_count += 1
                logger.error(f"Error processing email {msg['id']}: {msg_data['error']}")
                continue

            body = get_email_body(msg_data.get('payload', {}))
            if body == "No body content available.":
                continue

            opportunities = extract_opportunities(body)
            all_opportunities.extend(opportunities)

        # Final deduplication and URL extraction
        seen = set()
        unique_urls = []
        for opp in all_opportunities:
            key = opp['link'].lower()
            if key not in seen:
                seen.add(key)
                unique_urls.append(opp['link'])

        # Print results
        print("\n📊 EMAIL PROCESSING RESULTS:")
        print(f"Total emails processed: {len(messages)}")
        print(f"Emails skipped due to errors: {skipped_count}")
        print(f"Total opportunities found: {len(all_opportunities)}")
        print(f"Unique URLs found: {len(unique_urls)}")

        return unique_urls

    except HttpError as error:
        logger.error(f"Gmail API Error: {error}")
        print(f"❌ Gmail API Error: {error}")
        return []
    except Exception as error:
        logger.error(f"Unexpected error: {error}")
        print(f"❌ Unexpected error: {error}")
        return []

# --- Selenium Functions for Web Scraping ---
def setup_driver():
    """Set up Selenium WebDriver."""
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    try:
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        logger.info("WebDriver initialized successfully")
        return driver
    except Exception as e:
        logger.error(f"Failed to initialize WebDriver: {e}")
        raise

@tenacity.retry(
    stop=tenacity.stop_after_attempt(3),
    wait=tenacity.wait_fixed(2),
    retry=tenacity.retry_if_exception_type(Exception),
    before_sleep=lambda retry_state: logger.warning(f"Retrying login attempt {retry_state.attempt_number}...")
)
def login_to_govdirections(driver, url):
    """Attempt login to govdirections.com."""
    username = os.environ.get('GOV_USERNAME')
    password = os.environ.get('GOV_PASSWORD')

    if not username or not password:
        logger.error("GOV_USERNAME or GOV_PASSWORD not found in credentials.env")
        raise ValueError("Missing credentials in environment")

    driver.get(url)
    current_url = driver.current_url
    logger.info(f"Navigated to URL: {current_url}")

    try:
        email_field = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "data[User][email]"))
        )
        logger.info("Login form detected")

        email_field.send_keys(username)
        password_field = driver.find_element(By.NAME, "data[User][passwd]")
        password_field.send_keys(password)

        login_button = driver.find_element(By.XPATH, "//input[contains(translate(@value, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'log in')]")
        login_button.click()

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Event Date') or contains(text(), 'Agency Sponsor')]"))
        )
        logger.info("Login successful and bid content page loaded")
        return True
    except Exception as e:
        logger.error(f"Login failed: {e}")
        current_url = driver.current_url
        if 'login' in current_url.lower():
            logger.error("Redirected to login page after login attempt")
            return False
        try:
            WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Event Date') or contains(text(), 'Agency Sponsor')]"))
            )
            logger.info("Bid content found after login attempt, proceeding")
            return True
        except:
            logger.error("No bid content found after login attempt")
            return False

def extract_opportunity_details(driver, url):
    """Extract information from the bid details page."""
    try:
        current_url = driver.current_url
        logger.info(f"Extracting details from URL: {current_url}")
        if 'login' in current_url.lower():
            logger.error("Extraction aborted: Still on login page")
            return None

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "body"))
        )
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Save page source for debugging
        with open('page_source.html', 'w', encoding='utf-8') as f:
            f.write(driver.page_source)
        logger.info("Page source saved to page_source.html")

        details = {}

        def clean_text(text):
            return html.unescape(text.strip()) if text else "Not Available"

        # Extract title
        title_element = soup.select_one('h1') or soup.select_one('h2')
        details['title'] = clean_text(title_element.get_text()) if title_element else "Not Available"

        # Extract view link
        view_link = soup.find('a', string=re.compile(r'view all.*opportunities', re.I))
        details['view_link'] = view_link['href'] if view_link and 'href' in view_link.attrs else "Not Available"

        # Enhanced document link extraction
        doc_link = None
        # Look for URL immediately after "If online" text
        if_online_text = soup.find(string=re.compile(r'If online, then documents are here:', re.I))
        if if_online_text:
            next_sibling = if_online_text.find_next(string=True)
            if next_sibling and 'http' in next_sibling:
                doc_link = re.search(r'https?://[^\s]+', next_sibling).group(0)

        # If not found, look for any URL in the text
        if not doc_link:
            url_pattern = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[/\w .-]*/?')
            url_match = url_pattern.search(soup.get_text())
            if url_match:
                doc_link = url_match.group(0)

        details['document_link'] = doc_link if doc_link else "Not Available"

        # Extract event date
        event_date_label = soup.find('dt', string=re.compile(r'event\s*date', re.I))
        if event_date_label:
            next_dd = event_date_label.find_next('dd')
            details['event_date'] = clean_text(next_dd.get_text()) if next_dd else "Not Available"
        else:
            # Alternative extraction for event date
            date_match = re.search(r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun),?\s+\w+\s+\d{1,2}(?:st|nd|rd|th)?\s+\d{4}', soup.get_text())
            details['event_date'] = date_match.group(0) if date_match else "Not Available"

        # Extract agency sponsor
        agency_label = soup.find('dt', string=re.compile(r'agency\s*sponsor', re.I))
        if agency_label:
            next_dd = agency_label.find_next('dd')
            details['agency_sponsor'] = clean_text(next_dd.get_text()) if next_dd else "Not Available"
        else:
            # Alternative extraction for agency sponsor
            agency_match = re.search(r'The agency sponsor is:\s*(.*?)(?=\n|$)', soup.get_text())
            details['agency_sponsor'] = agency_match.group(1).strip() if agency_match else "Not Available"

        # Extract reference
        reference_label = soup.find('dt', string=re.compile(r'reference|id\s*number|solicitation\s*number', re.I))
        if reference_label:
            next_dd = reference_label.find_next('dd')
            details['reference'] = clean_text(next_dd.get_text()) if next_dd else "Not Available"
        else:
            # Alternative extraction for reference
            ref_match = re.search(r'The reference for this notice \(if available\):\s*(.*?)(?=\n|$)', soup.get_text())
            details['reference'] = ref_match.group(1).strip() if ref_match else "Not Available"

        # Extract contact information
        contact_label = soup.find('dt', string=re.compile(r'contact\s*(information|info)', re.I))
        if contact_label:
            contact_dd = contact_label.find_next('dd')
            if contact_dd:
                contact_text = clean_text(contact_dd.get_text(separator=' '))

                phone_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', contact_text)
                details['contact_phone'] = phone_match.group(0) if phone_match else "Not Available"

                email_match = re.search(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', contact_text)
                details['contact_email'] = email_match.group(0) if email_match else "Not Available"

                next_dd = contact_dd.find_next('dd')
                details['contact_dept'] = clean_text(next_dd.get_text()) if next_dd and ('Purchasing' in next_dd.get_text() or 'Procurement' in next_dd.get_text()) else "Not Available"

                details['contact_name'] = "Not Available"
            else:
                details['contact_phone'] = "Not Available"
                details['contact_email'] = "Not Available"
                details['contact_dept'] = "Not Available"
                details['contact_name'] = "Not Available"
        else:
            # Alternative extraction for contact info
            contact_text = soup.get_text()
            phone_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', contact_text)
            details['contact_phone'] = phone_match.group(0) if phone_match else "Not Available"

            email_match = re.search(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', contact_text)
            details['contact_email'] = email_match.group(0) if email_match else "Not Available"

            dept_match = re.search(r'Agency Contact Information:\s*(.*?)(?=\n|$)', contact_text)
            details['contact_dept'] = dept_match.group(1).strip() if dept_match else "Not Available"

            details['contact_name'] = "Not Available"

        # Extract summary
        summary_section = soup.find('div', class_='well well-sm text-left', string=re.compile(r'summary', re.I))
        if summary_section:
            summary_text = summary_section.find_next('p').get_text(separator='\n').strip() if summary_section.find_next('p') else ""
            details['summary'] = clean_text(summary_text) if summary_text else "Not Available"
        else:
            details['summary'] = "Not Available"

        # Extract competitive intelligence
        comp_intel_section = soup.find('div', class_='well well-sm text-left', string=re.compile(r'competitive\s*intelligence', re.I))
        if comp_intel_section:
            comp_text = comp_intel_section.get_text(separator='\n').strip()
            details['competitive_intel'] = clean_text(comp_text) if comp_text else "Not Available"
        else:
            details['competitive_intel'] = "Not Available"

        if all(value == "Not Available" for key, value in details.items() if key != 'document_link'):
            logger.warning("No meaningful data extracted, page may not contain bid details")
            return None

        return details
    except Exception as e:
        logger.error(f"Error extracting details: {e}")
        return None

# --- Gmail API Functions for Sending Emails ---
@retry_on_transient_error()
def authenticate_gmail_api(token_path: str = 'token_send.json') -> build:
    """
    Authenticates Gmail API for sending emails using token-based credentials.
    Ensures a refresh token is issued and enforces a 6-month validity by tracking creation time.
    """
    creds = None
    token_creation_time = None
    six_months = timedelta(days=183)  # Approximate 6 months
    scopes = SCOPES_GMAIL_SEND

    # Load credentials from token file if it exists
    if os.path.exists(token_path):
        try:
            with open(token_path, 'r') as token_file:
                token_data = json.load(token_file)
            creds = Credentials.from_authorized_user_info(token_data, scopes)
            token_creation_time = token_data.get('creation_time')
            logger.info(f"Loaded credentials from {token_path}. Refresh token present: {creds.refresh_token is not None}")
            # Verify scopes match
            if not all(scope in creds.scopes for scope in scopes):
                logger.warning(f"Credentials in {token_path} lack required scopes: {scopes}. Forcing re-authentication.")
                os.remove(token_path)
                creds = None
                token_creation_time = None
        except (ValueError, json.JSONDecodeError) as e:
            logger.error(f"Failed to load credentials from {token_path}: {e}")
            print(f"Error: Invalid {token_path} file. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None
            token_creation_time = None

    # Check if token is older than 6 months
    if token_creation_time:
        try:
            creation_dt = datetime.fromisoformat(token_creation_time)
            current_dt = datetime.now(pytz.UTC)
            if current_dt - creation_dt > six_months:
                logger.info(f"Token is older than 6 months (created: {token_creation_time}). Forcing re-authentication.")
                print("Token expired (older than 6 months). Re-authenticating...")
                os.remove(token_path)
                creds = None
        except ValueError as e:
            logger.error(f"Invalid creation_time in {token_path}: {e}. Deleting and re-authenticating...")
            os.remove(token_path)
            creds = None

    # Check if credentials are valid or can be refreshed
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                creds.refresh(Request())
                logger.info("Successfully refreshed access token.")
            except Exception as e:
                logger.error(f"Failed to refresh token: {e}")
                print(f"Error: Failed to refresh token ({e}). Re-authenticating...")
                if os.path.exists(token_path):
                    os.remove(token_path)
                creds = None
        
        # Run OAuth flow if no valid credentials
        if not creds:
            try:
                flow = InstalledAppFlow.from_client_secrets_file('client.json', scopes)
                creds = flow.run_local_server(
                    port=8080,
                    access_type='offline',  # Ensure refresh token is issued
                    prompt='consent'        # Force consent screen to get refresh token
                )
                logger.info(f"OAuth flow completed. Refresh token obtained: {creds.refresh_token is not None}")
            except FileNotFoundError:
                print("Error: 'client.json' file not found. Please download OAuth 2.0 credentials from Google Cloud Console.")
                raise
            except Exception as e:
                print(f"Error: OAuth flow failed: {e}. Ensure your Google account is authorized and the browser flow completes successfully.")
                raise
        
        # Save credentials with creation time
        if creds:
            try:
                token_data = json.loads(creds.to_json())
                token_data['creation_time'] = datetime.now(pytz.UTC).isoformat()  # Store creation time
                with open(token_path, 'w') as token:
                    json.dump(token_data, token, indent=2)
                print(f"Credentials saved to {token_path}")
                logger.info(f"Saved credentials to {token_path}. Refresh token: {creds.refresh_token is not None}, Creation time: {token_data['creation_time']}")
            except Exception as e:
                logger.error(f"Failed to save credentials to {token_path}: {e}")
                print(f"Warning: Failed to save credentials to {token_path}: {e}")
        else:
            print("Error: No valid credentials obtained from OAuth flow.")
            raise ValueError("Authentication failed: No valid credentials obtained.")

    # Build Gmail service
    try:
        return build('gmail', 'v1', credentials=creds)
    except Exception as e:
        print(f"Error: Failed to build Gmail API service: {e}")
        raise

@retry_on_transient_error()
@tenacity.retry(
    stop=tenacity.stop_after_attempt(5),  # Retry up to 5 times
    wait=tenacity.wait_fixed(12 * 60 * 60),  # Wait 12 hours between retries (in seconds)
    retry=tenacity.retry_if_exception_type(HttpError),  # Retry on HttpErrors
    before_sleep=lambda retry_state: logger.warning(
        f"Email sending failed. Retrying in 12 hours (attempt {retry_state.attempt_number})..."
    ),
)
def send_email(details):
    """Send email with formatted details using Gmail API."""
    sender_email = "rfp@iitlabs.com"  # Replace with your sender email
    to_email = "rfp@iitlabs.com"  # Replace with your recipient email
    subject = f"{details.get('title', 'Opportunity Details')}"

    # Format details in HTML
    email_body = f"""
<html>
  <body style="font-family: Arial, sans-serif;">
    <div style="background-color: #E6F3FA; padding: 10px; margin-bottom: 10px;">
      <p>The RFP: Learn to Read and Respond to a Request for Proposal - Available at Amazon</p>
    </div>
    <div style="background-color: #F5F5F5; padding: 10px; margin-bottom: 10px;">
      <h2>{details.get('title', 'Not Available')}</h2>
      <p><strong>★ Capture this Bid</strong></p>
      <p><a href="{details.get('view_link', 'Not Available')}">View all of your Captured Opportunities</a></p>
      <p>If online, then documents are here: <a href="{details.get('document_link', 'Not Available')}">{details.get('document_link', 'Not Available')}</a></p>
      <p><strong>Event Date:</strong> {details.get('event_date', 'Not Available')}</p>
      <p><strong>The agency sponsor is:</strong> {details.get('agency_sponsor', 'Not Available')}</p>
      <p><strong>The reference for this notice (if available):</strong> {details.get('reference', 'Not Available')}</p>
      <p><strong>Agency Contact Information:</strong></p>
      <p>☎ {details.get('contact_phone', 'Not Available')}</p>
      <p>{details.get('contact_dept', 'Not Available')}</p>
      <p>{details.get('contact_email', 'Not Available')}</p>
      <p><a href="#">Learn to Do Business with this Agency</a></p>
    </div>
  </body>
</html>
"""

    # Create MIME message
    message = MIMEText(email_body, 'html')
    message['to'] = to_email
    message['from'] = sender_email
    message['subject'] = subject

    # Encode message in base64
    raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode()
    message = {'raw': raw_message}

    try:
        service = authenticate_gmail_api(token_path='token_send.json')
        sent_message = service.users().messages().send(userId='me', body=message).execute()
        logger.info(f"Email sent successfully to {to_email}, Message ID: {sent_message['id']}")
    except HttpError as error:
        logger.error(f"Failed to send email: {error}")
        raise  # Re-raise to trigger retry
    except Exception as e:
        logger.error(f"Unexpected error sending email: {e}")
        raise  # Re-raise to trigger retry

# --- Main Execution ---
def main():
    """Main function to orchestrate the process."""
    driver = None
    try:
        # Step 1: Extract URLs from Gmail
        gmail_service = authenticate_gmail(SCOPES_GMAIL_READ, token_path='token_read.json')
        unique_urls = list_daily_bids_emails(gmail_service)
        if not unique_urls:
            logger.info("No new bid opportunity URLs found in Gmail.")
            print("No new bid opportunity URLs found in Gmail.")
            return  # Exit if no URLs found

        # Step 2: Process each URL
        driver = setup_driver()
        processed_urls = set()  # Keep track of processed URLs

        # Load processed URLs from pickle file
        if os.path.exists('processed_urls.pickle'):
            with open('processed_urls.pickle', 'rb') as f:
                processed_urls = pickle.load(f)
                logger.info(f"Loaded {len(processed_urls)} processed URLs from pickle file.")

        for url in unique_urls:
            if url in processed_urls:
                logger.info(f"Skipping already processed URL: {url}")
                continue  # Skip if already processed

            logger.info(f"Processing URL: {url}")
            if login_to_govdirections(driver, url):
                details = extract_opportunity_details(driver, url)
                if details:
                    logger.info("\nExtracted Opportunity Details:")
                    logger.info("=" * 50)
                    for key, value in details.items():
                        logger.info(f"{key.replace('_', ' ').title()}: {value}")
                    logger.info("=" * 50)
                    try:
                        send_email(details)
                        processed_urls.add(url)  # Add to processed set only after successful sending
                        # Save processed URLs to pickle file
                        with open('processed_urls.pickle', 'wb') as f:
                            pickle.dump(processed_urls, f)
                        logger.info(f"URL: {url} processed and email sent.")
                        time.sleep(5)  # Sleep for 5 seconds to avoid overloading the server
                    except Exception as e:
                        logger.error(f"Failed to send email for URL: {url}. Error: {e}")
                else:
                    logger.error(f"Failed to extract opportunity details for URL: {url}")
            else:
                logger.error(f"Failed to access bid content page for URL: {url}")
            # Save processed URLs after each iteration
            with open('processed_urls.pickle', 'wb') as f:
                pickle.dump(processed_urls, f)

    except Exception as e:
        logger.error(f"Main execution failed: {e}")
    finally:
        if driver:
            driver.quit()
            logger.info("WebDriver closed")

def run_scheduler():
    """Set up the scheduler to run main() every 5 minutes."""
    schedule.every(5).minutes.do(main)
    logger.info("Scheduler started. Running main() every 5 minutes. Press Ctrl+C to stop.")

    try:
        while True:
            schedule.run_pending()
            time.sleep(60)  # Check every minute to run pending tasks
    except KeyboardInterrupt:
        logger.info("Scheduler stopped by user.")
        print("Scheduler stopped.")

if __name__ == "__main__":
    run_scheduler()

2025-07-16 11:06:09,916 - INFO - Scheduler started. Running main() every 5 minutes. Press Ctrl+C to stop.
2025-07-16 11:11:09,967 - INFO - Loaded credentials from token_read.json. Refresh token present: True
2025-07-16 11:11:10,251 - INFO - Successfully refreshed access token.
2025-07-16 11:11:10,251 - INFO - Saved credentials to token_read.json. Refresh token: True, Creation time: 2025-07-16T05:41:10.251529+00:00
2025-07-16 11:11:10,267 - INFO - file_cache is only supported with oauth2client<4.0.0


Credentials saved to token_read.json


2025-07-16 11:11:10,943 - INFO - Fetched 61 messages. Next page token: None


📩 Total Emails Found: 61
🔍 Processing email 19/61...

2025-07-16 11:11:12,311 - ERROR - Error processing email 197abef24791d501: Batch request error: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/197abef24791d501?format=full&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">


🔍 Processing email 22/61...

2025-07-16 11:11:12,319 - ERROR - Error processing email 197a64f1cdda53a5: Batch request error: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/197a64f1cdda53a5?format=full&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">


🔍 Processing email 26/61...

2025-07-16 11:11:12,336 - ERROR - Error processing email 1977dcd743854f79: Batch request error: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/1977dcd743854f79?format=full&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">


🔍 Processing email 36/61...

2025-07-16 11:11:12,374 - ERROR - Error processing email 19743ab0e3e318d7: Batch request error: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/19743ab0e3e318d7?format=full&alt=json returned "Too many concurrent requests for user.". Details: "[{'message': 'Too many concurrent requests for user.', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">


🔍 Processing email 61/61...




📊 EMAIL PROCESSING RESULTS:
Total emails processed: 61
Emails skipped due to errors: 8
Total opportunities found: 1252
Unique URLs found: 1151


2025-07-16 11:11:13,899 - INFO - Get LATEST chromedriver version for google-chrome
2025-07-16 11:11:13,939 - INFO - Get LATEST chromedriver version for google-chrome
2025-07-16 11:11:13,970 - INFO - Driver [C:\Users\saiku\.wdm\drivers\chromedriver\win64\138.0.7204.157\chromedriver-win32/chromedriver.exe] found in cache
2025-07-16 11:11:15,735 - INFO - WebDriver initialized successfully
2025-07-16 11:11:15,778 - INFO - Loaded 551 processed URLs from pickle file.
2025-07-16 11:11:15,781 - INFO - Processing URL: https://govdirections.com/bids/view/1065922113
2025-07-16 11:11:18,533 - INFO - Navigated to URL: https://govdirections.com/users/login
2025-07-16 11:11:18,554 - INFO - Login form detected
2025-07-16 11:11:22,750 - INFO - Login successful and bid content page loaded
2025-07-16 11:11:22,757 - INFO - Extracting details from URL: https://govdirections.com/bids/view/1065922113/Website_Design_and_Layout
2025-07-16 11:11:22,784 - INFO - Page source saved to page_source.html
2025-07-16 1

Credentials saved to token_send.json


2025-07-16 11:11:23,846 - INFO - Email sent successfully to rfp@iitlabs.com, Message ID: 19811c0804327a6a
2025-07-16 11:11:23,862 - INFO - URL: https://govdirections.com/bids/view/1065922113 processed and email sent.
2025-07-16 11:11:28,879 - INFO - Processing URL: https://govdirections.com/bids/view/1065923260
2025-07-16 11:11:31,117 - INFO - Navigated to URL: https://govdirections.com/bids/view/1065923260/NASA_Ames_Partnership_Days
2025-07-16 11:11:41,524 - ERROR - Login failed: Message: 
Stacktrace:
	GetHandleVerifier [0x0xad1a33+62339]
	GetHandleVerifier [0x0xad1a74+62404]
	(No symbol) [0x0x912123]
	(No symbol) [0x0x95a86e]
	(No symbol) [0x0x95ac0b]
	(No symbol) [0x0x9a2f72]
	(No symbol) [0x0x97f404]
	(No symbol) [0x0x9a07a3]
	(No symbol) [0x0x97f1b6]
	(No symbol) [0x0x94e7a2]
	(No symbol) [0x0x94f644]
	GetHandleVerifier [0x0xd465c3+2637587]
	GetHandleVerifier [0x0xd419ca+2618138]
	GetHandleVerifier [0x0xaf84aa+220666]
	GetHandleVerifier [0x0xae88d8+156200]
	GetHandleVerifier [0x0x

Scheduler stopped.
