In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
import json
import logging

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def setup_driver():
    """Configure and return ChromeDriver with standard Selenium."""
    try:
        chrome_options = Options()
        # Basic options for stability
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--window-size=1920,1080')
        # Comment out headless mode for debugging
        # chrome_options.add_argument('--headless=new')
        
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.set_page_load_timeout(30)
        
        return driver
    except Exception as e:
        logger.error(f"Failed to setup WebDriver: {str(e)}")
        raise

def wait_for_element(driver, xpath, timeout=10):
    """Wait for an element to be present and visible."""
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.XPATH, xpath))
        )
        return element
    except Exception as e:
        logger.error(f"Element not found: {xpath}")
        return None

def scrape_cve_data():
    """Scrape CVE data with improved error handling."""
    driver = None
    cve_data = []
    
    try:
        driver = setup_driver()
        url = "https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=medical"
        logger.info(f"Navigating to {url}")
        
        driver.get(url)
        logger.info("Page loaded, waiting for content...")
        
        # Wait for main content to load
        table_container = wait_for_element(driver, "/html/body/div[2]/div[3]/div[2]")
        
        if not table_container:
            logger.error("Table container not found")
            return []
            
        logger.info("Table container found, looking for table rows...")
        
        # Wait a moment for any dynamic content
        time.sleep(2)
        
        # Find all table rows
        rows = table_container.find_elements(By.TAG_NAME, "tr")
        logger.info(f"Found {len(rows)} rows in table")
        
        if len(rows) <= 1:
            # Try alternative method to find rows
            rows = driver.find_elements(By.XPATH, "//div[2]/div[3]/div[2]//tr")
            logger.info(f"Alternative method found {len(rows)} rows")
        
        # Skip header row if it exists
        start_index = 1 if len(rows) > 0 and rows[0].find_elements(By.TAG_NAME, "th") else 0
        
        for row in rows[start_index:]:
            try:
                cells = row.find_elements(By.TAG_NAME, "td")
                
                if len(cells) >= 2:
                    cve_id = cells[0].text.strip()
                    description = cells[1].text.strip()
                    
                    if cve_id and description:
                        entry = {
                            "CVE_ID": cve_id,
                            "Description": description
                        }
                        cve_data.append(entry)
                        logger.info(f"Processed: {cve_id}")
                else:
                    logger.warning(f"Row has incorrect number of cells: {len(cells)}")
            except Exception as e:
                logger.error(f"Error processing row: {str(e)}")
                continue
                
    except Exception as e:
        logger.error(f"Fatal error: {str(e)}")
    finally:
        if driver:
            try:
                driver.quit()
            except:
                pass
        
    return cve_data

def save_to_json(data, filename='cve_medical_data.json'):
    """Save data to JSON file with error handling."""
    try:
        with open(filename, 'w', encoding='utf-8') as json_file:
            json.dump(data, json_file, indent=4, ensure_ascii=False)
        logger.info(f"Successfully saved {len(data)} entries to {filename}")
        return True
    except Exception as e:
        logger.error(f"Error saving to JSON: {str(e)}")
        return False

def main():
    """Main function to orchestrate the scraping process."""
    try:
        cve_data = scrape_cve_data()
        if cve_data:
            logger.info(f"Successfully scraped {len(cve_data)} CVE entries")
            save_to_json(cve_data)
        else:
            logger.warning("No CVE data was scraped")
    except Exception as e:
        logger.error(f"Main process error: {str(e)}")

if __name__ == "__main__":
    main()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [C:\Users\acer\.wdm\drivers\chromedriver\win64\132.0.6834.159\chromedriver-win32/chromedriver.exe] found in cache
INFO:__main__:Navigating to https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=medical
INFO:__main__:Page loaded, waiting for content...
INFO:__main__:Table container found, looking for table rows...
INFO:__main__:Found 110 rows in table
INFO:__main__:Processed: CVE-2024-53522
INFO:__main__:Processed: CVE-2024-50593
INFO:__main__:Processed: CVE-2024-50592
INFO:__main__:Processed: CVE-2024-50591
INFO:__main__:Processed: CVE-2024-50590
INFO:__main__:Processed: CVE-2024-50589
INFO:__main__:Processed: CVE-2024-50588
INFO:__main__:Processed: CVE-2024-48703
INFO:__main__:Processed: CVE-2024-44024
INFO:__main__:Processed: CVE-2024-40392
INFO:__main__:Processed: CVE-2024-36673
INFO:__main__:Processed: CVE-2024-33606
INFO:__main__:Processed: CVE-2024-23