In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads the given URL and waits for the page to load."""
    driver.get(url)
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))  # Wait for route elements
    time.sleep(2)  # Optional: add a brief wait after the page load

def scrape_bus_routes(driver):
    """Scrape bus routes and links."""
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    """Scrape bus details for a specific route."""
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'travels.lh-24.f-bold.d-color')))  # Wait for bus details
        
        try:
            # Click the "View Buses" button if it exists
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'travels.lh-24.f-bold.d-color')))  # Wait for buses to load
            
        except Exception as e:
            print(f"No 'View Buses' button found for route {route_name}: {e}")

        # Scroll to ensure all bus details are loaded
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)

        # Find bus item details
        bus_name_elements = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departing_time_elements = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        # Compile bus details
        bus_details = []
        for i in range(len(bus_name_elements)):
            bus_details.append({
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_elements[i].text,
                "Bus_Type": bus_type_elements[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0'
            })
        return bus_details
        
    except Exception as e:
        print(f"Error occurred while scraping bus details for {url}: {str(e)}")
        return []

def scrape_all_pages():
    """Scrape all pages for bus routes and details."""
    all_bus_details = []
    driver = initialize_driver()  # Initialize driver once
    try:
        load_page(driver, URL)

        for page in range(1, 3):  # Assuming 2 pages
            try:
                if page > 1:
                    pagination_tab = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                    )
                    driver.execute_script("arguments[0].click();", pagination_tab)
                    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))  # Wait for routes to load
                    time.sleep(3)  # Brief wait for smooth transition

                # Scrape routes for the current page
                all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
                for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                    bus_details = scrape_bus_details(driver, link, name)
                    if bus_details:
                        all_bus_details.extend(bus_details)

            except Exception as e:
                print(f"Error occurred while accessing page {page}: {str(e)}")
    
    finally:
        driver.quit()  # Ensure the driver is closed even if an error occurs
    return all_bus_details

if __name__ == "__main__":
    # Scrape the data
    bus_data = scrape_all_pages()

    if bus_data:
        # Convert data to a DataFrame and save to CSV
        df = pd.DataFrame(bus_data)
        df.to_csv('kerala_bus_details.csv', index=False)
        print("Data successfully saved to 'kerala_bus_details.csv'")
    else:
        print("No data scraped.")


Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/bangalore-to-kozhikode: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60335EEB2+31554]
	(No symbol) [0x00007FF6032D7EE9]
	(No symbol) [0x00007FF60319872A]
	(No symbol) [0x00007FF6031E8434]
	(No symbol) [0x00007FF6031E853C]
	(No symbol) [0x00007FF60322F6A7]
	(No symbol) [0x00007FF60320D06F]
	(No symbol) [0x00007FF60322C977]
	(No symbol) [0x00007FF60320CDD3]
	(No symbol) [0x00007FF6031DA33B]
	(No symbol) [0x00007FF6031DAED1]
	GetHandleVerifier [0x00007FF603668B2D+3217341]
	GetHandleVerifier [0x00007FF6036B5AF3+3532675]
	GetHandleVerifier [0x00007FF6036AB0F0+3489152]
	GetHandleVerifier [0x00007FF60340E786+750614]
	(No symbol) [0x00007FF6032E376F]
	(No symbol) [0x00007FF6032DEB24]
	(No symbol) [0x00007FF6032DECB2]
	(No symbol) [0x00007FF6032CE17F]
	BaseThreadInitThunk [0x00007FFE5633257D+29]
	RtlUserThreadStart [0x00007FFE572CAF28+40]

Error occurred while scraping bus details for https://www.redbus.in/bus

NameError: name 'driver' is not defined