In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Base URL of the website
BASE_URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"

# Initialize the WebDriver
def initialize_browser():
    browser = webdriver.Chrome()
    browser.maximize_window()
    return browser

# Load the webpage
def open_page(browser, url):
    browser.get(url)
    time.sleep(4)  # Wait for the page to load completely

# Extract the routes from the page
def extract_routes(browser):
    route_elements = browser.find_elements(By.CLASS_NAME, 'route')
    route_links = [element.get_attribute('href') for element in route_elements]
    route_names = [element.text.strip() for element in route_elements]
    return route_links, route_names

# Extract details for a specific bus route
def fetch_bus_information(browser, route_url, route_name):
    try:
        browser.get(route_url)
        time.sleep(4)  # Allow the route page to load
        
        try:
            # Check if the "View Buses" button exists and click it
            view_buses_btn = WebDriverWait(browser, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            browser.execute_script("arguments[0].click();", view_buses_btn)
            time.sleep(4)  # Wait for bus details to load
        except Exception:
            print(f"No 'View Buses' button found for route: {route_name}")

        # Scroll to the bottom of the page to load all buses
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)

        # Locate bus details on the page
        names = browser.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        types = browser.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departures = browser.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        durations = browser.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrivals = browser.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        ratings = browser.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        fares = browser.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availabilities = browser.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        # Store data in a list of dictionaries
        bus_data = []
        for i in range(len(names)):
            bus_data.append({
                "Route": route_name,
                "Route_URL": route_url,
                "Bus_Name": names[i].text if i < len(names) else "N/A",
                "Bus_Type": types[i].text if i < len(types) else "N/A",
                "Departure_Time": departures[i].text if i < len(departures) else "N/A",
                "Duration": durations[i].text if i < len(durations) else "N/A",
                "Arrival_Time": arrivals[i].text if i < len(arrivals) else "N/A",
                "Rating": ratings[i].text if i < len(ratings) else "N/A",
                "Fare": fares[i].text if i < len(fares) else "N/A",
                "Seats_Available": seat_availabilities[i].text if i < len(seat_availabilities) else "N/A"
            })
        return bus_data

    except Exception as error:
        print(f"Error fetching bus information for route {route_name}: {error}")
        return []

# Scrape bus data across all pages
def scrape_all_bus_data():
    collected_data = []
    browser = initialize_browser()

    try:
        for page in range(1, 6):  # Iterate through 5 pages
            open_page(browser, BASE_URL)

            # Handle pagination for subsequent pages
            if page > 1:
                try:
                    pagination_button = WebDriverWait(browser, 10).until(
                        EC.element_to_be_clickable((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                    )
                    browser.execute_script("arguments[0].scrollIntoView();", pagination_button)
                    browser.execute_script("arguments[0].click();", pagination_button)
                    time.sleep(5)  # Wait for the new page to load
                except Exception as e:
                    print(f"Error accessing page {page}: {e}")
                    continue

            # Extract routes on the current page
            route_links, route_names = extract_routes(browser)
            for route_link, route_name in zip(route_links, route_names):
                buses = fetch_bus_information(browser, route_link, route_name)
                if buses:
                    collected_data.extend(buses)

    finally:
        browser.quit()
    return collected_data

# Main entry point
if __name__ == "__main__":
    bus_details = scrape_all_bus_data()

    if bus_details:
        # Convert the collected data to a pandas DataFrame
        df = pd.DataFrame(bus_details)
        # Save the data to a CSV file
        df.to_csv('assam_bus_information.csv', index=False)
        print("Bus data successfully saved to 'assam_bus_information.csv'")
    else:
        print("No bus data collected.")


Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/north-lakhimpur-to-sibsagar: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF75ECBEEA2+31554]
	(No symbol) [0x00007FF75EC37ED9]
	(No symbol) [0x00007FF75EAF872A]
	(No symbol) [0x00007FF75EB48434]
	(No symbol) [0x00007FF75EB4853C]
	(No symbol) [0x00007FF75EB8F6A7]
	(No symbol) [0x00007FF75EB6D06F]
	(No symbol) [0x00007FF75EB8C977]
	(No symbol) [0x00007FF75EB6CDD3]
	(No symbol) [0x00007FF75EB3A33B]
	(No symbol) [0x00007FF75EB3AED1]
	GetHandleVerifier [0x00007FF75EFC8B1D+3217341]
	GetHandleVerifier [0x00007FF75F015AE3+3532675]
	GetHandleVerifier [0x00007FF75F00B0E0+3489152]
	GetHandleVerifier [0x00007FF75ED6E776+750614]
	(No symbol) [0x00007FF75EC4375F]
	(No symbol) [0x00007FF75EC3EB14]
	(No symbol) [0x00007FF75EC3ECA2]
	(No symbol) [0x00007FF75EC2E16F]
	BaseThreadInitThunk [0x00007FFE5633257D+29]
	RtlUserThreadStart [0x00007FFE572CAF28+40]

Error occurred while scraping bus details for https://www.redbus.i

NameError: name 'driver' is not defined

In [2]:
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Assam State Transport Corporation (ASTC) - 147480,Bharat Benz A/C Seater (2+2),05:00,04h 00m,09:00,3.4,INR 270,3 Seats available
1,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Assam State Transport Corporation (ASTC) - 135927,Volvo AC Seater 2+2,07:10,04h 30m,11:40,4.3,INR 298,12 Seats available
2,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Assam State Transport Corporation (ASTC) - 173420,Bharat Benz A/C Seater (2+2),10:35,04h 40m,15:15,4.1,INR 298,7 Seats available
3,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Assam State Transport Corporation (ASTC) - 157959,Bharat Benz A/C Seater (2+2),14:00,04h 30m,18:30,4.4,INR 298,26 Seats available
4,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Assam State Transport Corporation (ASTC) - 168821,Bharat Benz A/C Seater (2+2),15:45,04h 45m,20:30,2.5,INR 298,32 Seats available
...,...,...,...,...,...,...,...,...,...,...
401,Dibrugarh to Bokakhat,https://www.redbus.in/bus-tickets/dibrugarh-to...,Aniruddha Travels,AC Seater (2+2),07:30,06h 30m,14:00,3.6,450,2 Seats available
402,Dibrugarh to Bokakhat,https://www.redbus.in/bus-tickets/dibrugarh-to...,Shree Ram Travels,A/C Seater (2+1),20:00,05h 00m,01:00,4.8,475,13 Seats available
403,Dibrugarh to Bokakhat,https://www.redbus.in/bus-tickets/dibrugarh-to...,Shree Ram Travels,Bharat Benz A/C Seater /Sleeper (2+1),20:45,04h 55m,01:40,4.8,505,7 Seats available
404,Dibrugarh to Bokakhat,https://www.redbus.in/bus-tickets/dibrugarh-to...,Christina Travels,NON A/C Seater (2+2),08:20,03h 55m,12:15,2.8,650,15 Seats available
