In [None]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import pandas as pd

def setup_driver(chrome_driver_path):
    # Create a Service object
    service = Service(chrome_driver_path)
    
    # Set up Chrome options
    options = Options()
    options.add_argument("--start-maximized")
    # options.add_argument("--headless")  # Uncomment if you want to run in headless mode
    
    # Initialize the WebDriver with the Service and Options objects
    driver = webdriver.Chrome(service=service, options=options)
    return driver

def extract_route_links(driver, path):
    """
    Retrieve bus links and routes from the webpage.
    
    Args:
    driver (webdriver): The WebDriver instance.
    path (str): The XPath to find the route elements.
    
    Returns:
    tuple: Lists of links and routes.
    """
    links = []
    routes = []
    wait = WebDriverWait(driver, 20)
    
    for i in range(1, 10):  # Adjust the range as necessary to cover all pages
        paths = driver.find_elements(By.XPATH, path)
        
        # Debugging: Print the number of elements found
        print(f"Found {len(paths)} elements with the path: {path}")
        
        for link in paths:
            link_href = link.get_attribute("href")
            link_text = link.text
            if link_href and link_text:
                links.append(link_href)
                routes.append(link_text)
        
        try:
            # Wait for the pagination element to be present
            pagination = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
            next_button = pagination.find_element(By.XPATH, f'//div[@class="DC_117_pageTabs " and text()={i+1}]')
            
            # Scroll the next button into view
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            time.sleep(3)
            
            # Wait until the element is clickable
            wait.until(EC.element_to_be_clickable((By.XPATH, f'//div[@class="DC_117_pageTabs " and text()={i+1}]')))
            next_button.click()
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException) as e:
            print(f"No more pages to paginate at step {i} or encountered an error: {e}")
            break
            
    return links, routes

def main():
    state_links = [
        "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile",
        "https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile"
    ]

    # Specify the path to the ChromeDriver executable
    chrome_driver_path = r'C:\Users\Prasanna SK\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe'

    # Initialize the WebDriver
    driver = setup_driver(chrome_driver_path)
    
    all_links = []
    all_routes = []
    all_states = []

    try:
        for state_link in state_links:
            # Open the browser and load the webpage
            driver.get(state_link)
            time.sleep(3)  # Allow the page to load
            
            # Extract state name from the URL (assuming it follows a pattern)
            state_name = state_link.split('/')[4].split('-')[0].title()

            # Call the function and retrieve links and routes for the current state
            links, routes = extract_route_links(driver, "//div[@class='route_details']/a")
            
            all_links.extend(links)
            all_routes.extend(routes)
            all_states.extend([state_name] * len(links))

    finally:
        # Ensure the browser is closed properly
        driver.quit()

    # Optionally, you can save the retrieved data to a CSV file
    df = pd.DataFrame({
        'Route Links': all_links,
        'Routes': all_routes,
        'State': all_states
    })

    df.to_csv('all_routes.csv', index=False)
    print("Data saved to all_routes.csv")

if __name__ == "__main__":
    main()

Found 10 elements with the path: //div[@class='route_details']/a
Found 7 elements with the path: //div[@class='route_details']/a
No more pages to paginate at step 2 or encountered an error: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//div[@class="DC_117_pageTabs " and text()=3]"}
  (Session info: chrome=131.0.6778.86); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF706F86CB5+28821]
	(No symbol) [0x00007FF706EF3840]
	(No symbol) [0x00007FF706D9578A]
	(No symbol) [0x00007FF706DE91BE]
	(No symbol) [0x00007FF706DE94AC]
	(No symbol) [0x00007FF706DDC52C]
	(No symbol) [0x00007FF706E0F33F]
	(No symbol) [0x00007FF706DDC3F6]
	(No symbol) [0x00007FF706E0F510]
	(No symbol) [0x00007FF706E2F412]
	(No symbol) [0x00007FF706E0F0A3]
	(No symbol) [0x00007FF706DDA778]
	(No symbol) [0x00007FF706DDB8E1]
	GetHandleVerifier [0x00007FF