# Testing SBI Tender Scraping

This notebook breaks down the SBI tender scraping process into steps for testing and development. We'll test:
1. Playwright setup and browser initialization
2. Page navigation and table extraction
3. Data parsing and transformation

In [None]:
import asyncio
import sys
from pathlib import Path

# Add project root to path
project_root = str(Path.cwd().parent)
if project_root not in sys.path:
    sys.path.append(project_root)

from app.scrapers.table_scraper import TableScraper
from app.scrapers.playwright_manager import PlaywrightManager

## Source Configuration

Define the configuration for the SBI tender page scraping:

In [None]:
# Configure source
source_config = {
    'id': 1,
    'psu_name': 'SBI',
    'base_url': 'https://sbi.co.in/web/sbi-in-the-news/empanelment-of-vendors',
    'page_type': 'table',
    'scrape_frequency_hours': 24,
    'active': True,
    'table_selector': '#procurement > table',
    'header_mapping': {
        'tender description': 'title',
        'location': 'location',
        'start date': 'start_date',
        'end date': 'end_date'
    }
}

## Browser Initialization

Test initializing the Playwright browser:

In [None]:
# Test browser initialization
async def test_browser_init():
    browser_manager = PlaywrightManager()
    await browser_manager.initialize()
    return browser_manager

# Run initialization
browser_manager = await test_browser_init()
print('Browser initialized successfully')

## Page Navigation

Test navigating to the SBI tenders page:

In [None]:
# Test page navigation
async def test_navigation(browser_manager):
    async with browser_manager.get_context() as context:
        # Create new page
        page = await context.new_page()
        
        # Navigate to URL
        success = await PlaywrightManager.navigate_with_retry(
            page,
            source_config['base_url'],
            max_retries=3
        )
        return page if success else None

# Navigate to page
page = await test_navigation(browser_manager)
if page:
    print('Navigation successful')
else:
    print('Navigation failed')

## Data Extraction

Test extracting tender data from the table:

In [None]:
# Test table extraction
async def test_extraction(page):
    scraper = TableScraper(source_config)
    result = await scraper.extract_notifications(page)
    return result

# Extract data
if page:
    result = await test_extraction(page)
    if result.success:
        print(f'Found {len(result.notifications)} notifications:')
        for notif in result.notifications[:3]:  # Show first 3 notifications
            print(f"\n- Title: {notif['title']}")
            print(f"  Location: {notif.get('location')}")
            print(f"  Start Date: {notif.get('start_date')}")
            print(f"  End Date: {notif.get('end_date')}")
    else:
        print(f'Error: {result.error_message}')

## Cleanup

Clean up browser resources:

In [None]:
# Cleanup
await browser_manager.cleanup()
print('Browser cleaned up')