In [64]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Step 1: Initialize the WebDriver
def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

# Step 2: Load the webpage
def load_page(driver, url):
    driver.get(url)
    time.sleep(2)

# Step 3: Extract bus route links and names
def get_routes(driver):
    routes = driver.find_elements(By.CLASS_NAME, "route")
    links = [route.get_attribute('href') for route in routes]
    names = [route.text.strip() for route in routes]
    return list(zip(links, names))

# Step 4: Scrape bus details for a single route
def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(2)

        # Click "View Buses" button
        click_view_buses_button(driver)

        # Scroll to load all bus details
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        # Extract bus details
        return extract_bus_details(driver, url, route_name)
    except Exception as e:
        print(f"Error scraping details for {route_name} ({url}): {e}")
        return []

# Helper: Click the "View Buses" button
def click_view_buses_button(driver):
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    ).click()
    time.sleep(2)

# Helper: Extract bus details from the current page
def extract_bus_details(driver, url, route_name):
    buses = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
    types = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
    departures = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
    durations = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
    arrivals = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
    ratings = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
    prices = driver.find_elements(By.CLASS_NAME, "fare.d-block")
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

    details = []
    for i in range(len(buses)):
        details.append({
            "Route_Name": route_name,
            "Route_Link": url,
            "Bus_Name": buses[i].text,
            "Bus_Type": types[i].text,
            "Departing_Time": departures[i].text,
            "Duration": durations[i].text,
            "Reaching_Time": arrivals[i].text,
            "Star_Rating": ratings[i].text if i < len(ratings) else "N/A",
            "Price": prices[i].text,
            "Seat_Availability": seats[i].text if i < len(seats) else "N/A"
        })
    return details

# Step 5: Save collected data to a CSV file
def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

# Main function to execute the scraping process
def scrape_bus_details_main(url, output_file):
    driver = initialize_driver()
    try:
        # Step 1: Load the webpage
        load_page(driver, url)

        # Step 2: Get all bus routes
        routes = get_routes(driver)

        # Step 3: Collect all bus details
        all_bus_data = []
        for link, name in routes:
            route_data = scrape_bus_details(driver, link, name)
            all_bus_data.extend(route_data)

        # Step 4: Save data to CSV
        save_to_csv(all_bus_data, output_file)
        print(f"Data successfully saved to '{output_file}'")
    finally:
        driver.quit()


In [65]:
# Modify these variables for different URLs and output file names

# 1. Telangana state road transport corporation
TARGET_URL = "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile"
OUTPUT_FILENAME = "TSRTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'TSRTC.csv'


In [66]:
# 2. Kerala state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"
OUTPUT_FILENAME = "KSRTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Error scraping details for Mysore to Kozhikode (https://www.redbus.in/bus-tickets/mysore-to-kozhikode): Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7B2FA6CF5+28821]
	(No symbol) [0x00007FF7B2F13880]
	(No symbol) [0x00007FF7B2DB578A]
	(No symbol) [0x00007FF7B2E091BE]
	(No symbol) [0x00007FF7B2E094AC]
	(No symbol) [0x00007FF7B2E52647]
	(No symbol) [0x00007FF7B2E2F33F]
	(No symbol) [0x00007FF7B2E4F412]
	(No symbol) [0x00007FF7B2E2F0A3]
	(No symbol) [0x00007FF7B2DFA778]
	(No symbol) [0x00007FF7B2DFB8E1]
	GetHandleVerifier [0x00007FF7B32DFCED+3408013]
	GetHandleVerifier [0x00007FF7B32F745F+3504127]
	GetHandleVerifier [0x00007FF7B32EB63D+3455453]
	GetHandleVerifier [0x00007FF7B306BDFB+835995]
	(No symbol) [0x00007FF7B2F1EB9F]
	(No symbol) [0x00007FF7B2F1A854]
	(No symbol) [0x00007FF7B2F1A9ED]
	(No symbol) [0x00007FF7B2F0A1D9]
	BaseThreadInitThunk [0x00007FFECDEBE8D7+23]
	RtlUserThreadStart [0x00007FFECE95FBCC+44]

Data successfully saved to 'KSRTC.csv'


In [67]:
# 3. Uttar pradesh state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"
OUTPUT_FILENAME = "UPSRTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'UPSRTC.csv'


In [68]:
# 4. Jammu kashmri state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/jksrtc"
OUTPUT_FILENAME = "JKSRTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'JKSRTC.csv'


In [69]:
# 5. Kadamba state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile"
OUTPUT_FILENAME = "KTCL.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'KTCL.csv'


In [70]:
# 6. South Bengal state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometilee"
OUTPUT_FILENAME = "SBSTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'SBSTC.csv'


In [71]:
# 7. Himachal road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile"
OUTPUT_FILENAME = "HRTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'HRTC.csv'


In [72]:
# 8. Assam state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"
OUTPUT_FILENAME = "ASTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Error scraping details for Goalpara to Guwahati (https://www.redbus.in/bus-tickets/goalpara-to-guwahati): Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7B2FA6CF5+28821]
	(No symbol) [0x00007FF7B2F13880]
	(No symbol) [0x00007FF7B2DB578A]
	(No symbol) [0x00007FF7B2E091BE]
	(No symbol) [0x00007FF7B2E094AC]
	(No symbol) [0x00007FF7B2E52647]
	(No symbol) [0x00007FF7B2E2F33F]
	(No symbol) [0x00007FF7B2E4F412]
	(No symbol) [0x00007FF7B2E2F0A3]
	(No symbol) [0x00007FF7B2DFA778]
	(No symbol) [0x00007FF7B2DFB8E1]
	GetHandleVerifier [0x00007FF7B32DFCED+3408013]
	GetHandleVerifier [0x00007FF7B32F745F+3504127]
	GetHandleVerifier [0x00007FF7B32EB63D+3455453]
	GetHandleVerifier [0x00007FF7B306BDFB+835995]
	(No symbol) [0x00007FF7B2F1EB9F]
	(No symbol) [0x00007FF7B2F1A854]
	(No symbol) [0x00007FF7B2F1A9ED]
	(No symbol) [0x00007FF7B2F0A1D9]
	BaseThreadInitThunk [0x00007FFECDEBE8D7+23]
	RtlUserThreadStart [0x00007FFECE95FBCC+44]

Error scraping details for Jorhat to North Lakhimpur (https://www.red

In [73]:
# 9. North Bengal state road transport corporation

TARGET_URL = "https://www.redbus.in/travels/nbstc"
OUTPUT_FILENAME = "NBSTC.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Error scraping details for Siliguri to Darjeeling (https://www.redbus.in/bus-tickets/siliguri-to-darjeeling): Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7B2FA6CF5+28821]
	(No symbol) [0x00007FF7B2F13880]
	(No symbol) [0x00007FF7B2DB578A]
	(No symbol) [0x00007FF7B2E091BE]
	(No symbol) [0x00007FF7B2E094AC]
	(No symbol) [0x00007FF7B2E52647]
	(No symbol) [0x00007FF7B2E2F33F]
	(No symbol) [0x00007FF7B2E4F412]
	(No symbol) [0x00007FF7B2E2F0A3]
	(No symbol) [0x00007FF7B2DFA778]
	(No symbol) [0x00007FF7B2DFB8E1]
	GetHandleVerifier [0x00007FF7B32DFCED+3408013]
	GetHandleVerifier [0x00007FF7B32F745F+3504127]
	GetHandleVerifier [0x00007FF7B32EB63D+3455453]
	GetHandleVerifier [0x00007FF7B306BDFB+835995]
	(No symbol) [0x00007FF7B2F1EB9F]
	(No symbol) [0x00007FF7B2F1A854]
	(No symbol) [0x00007FF7B2F1A9ED]
	(No symbol) [0x00007FF7B2F0A1D9]
	BaseThreadInitThunk [0x00007FFECDEBE8D7+23]
	RtlUserThreadStart [0x00007FFECE95FBCC+44]

Error scraping details for Kolkata to Cooch Behar (West Bengal) (

In [74]:
# 10. PEPSU(Punjab) state road transport corporation

TARGET_URL = "https://www.redbus.in/online-booking/pepsu/?utm_source=rtchometile"
OUTPUT_FILENAME = "PEPSU.csv"

scrape_bus_details_main(TARGET_URL, OUTPUT_FILENAME)

Data successfully saved to 'PEPSU.csv'
