In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Target URL for scraping
TARGET_URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

def setup_browser():
    """Initialize and return a Selenium WebDriver instance."""
    browser = webdriver.Chrome()
    browser.maximize_window()
    return browser

def open_url(browser, url):
    """Navigate to the given URL and wait for the page to load."""
    browser.get(url)
    time.sleep(5)  # Allow some time for the page to render

def get_routes(browser):
    """Scrape available bus routes and their respective links."""
    routes = browser.find_elements(By.CLASS_NAME, 'route')
    route_links = [route.get_attribute('href') for route in routes]
    route_names = [route.text.strip() for route in routes]
    return route_links, route_names

def get_bus_info(browser, route_url, route_name):
    """Scrape details of buses for a given route."""
    try:
        browser.get(route_url)
        time.sleep(4)

        # Click the "View Buses" button, if available
        try:
            view_buses = WebDriverWait(browser, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            browser.execute_script("arguments[0].click();", view_buses)
            time.sleep(4)
        except Exception:
            print(f"No 'View Buses' button found for route: {route_name}")

        # Scroll to ensure all buses are loaded
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)

        # Fetch bus details
        names = browser.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        types = browser.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departures = browser.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        durations = browser.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrivals = browser.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        ratings = browser.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        fares = browser.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_info = browser.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        bus_details = []
        for i in range(len(names)):
            bus_details.append({
                "Route": route_name,
                "Route_URL": route_url,
                "Bus_Name": names[i].text if i < len(names) else "N/A",
                "Bus_Type": types[i].text if i < len(types) else "N/A",
                "Departure_Time": departures[i].text if i < len(departures) else "N/A",
                "Duration": durations[i].text if i < len(durations) else "N/A",
                "Arrival_Time": arrivals[i].text if i < len(arrivals) else "N/A",
                "Rating": ratings[i].text if i < len(ratings) else "N/A",
                "Fare": fares[i].text if i < len(fares) else "N/A",
                "Seats_Available": seat_info[i].text if i < len(seat_info) else "N/A"
            })

        return bus_details

    except Exception as err:
        print(f"Error fetching bus details for {route_name}: {err}")
        return []

def scrape_data():
    """Main function to scrape all bus route and details data."""
    browser = setup_browser()
    all_data = []

    try:
        open_url(browser, TARGET_URL)

        # Scrape routes for each page
        for page in range(1, 6):  # Assuming there are 5 pages
            if page > 1:
                try:
                    pagination = WebDriverWait(browser, 10).until(
                        EC.element_to_be_clickable((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                    )
                    browser.execute_script("arguments[0].click();", pagination)
                    time.sleep(5)  # Wait for the page to load
                except Exception as e:
                    print(f"Error navigating to page {page}: {e}")
                    continue

            # Scrape bus routes and their details
            route_links, route_names = get_routes(browser)
            for route_link, route_name in zip(route_links, route_names):
                buses = get_bus_info(browser, route_link, route_name)
                if buses:
                    all_data.extend(buses)

    finally:
        browser.quit()

    return all_data

if __name__ == "__main__":
    # Start the scraping process
    bus_data = scrape_data()

    if bus_data:
        # Convert scraped data to a DataFrame
        df = pd.DataFrame(bus_data)
        # Save the data to a CSV file
        df.to_csv('chandigarh_transport_data.csv', index=False)
        print("Data saved to 'chandigarh_transport_data.csv'")
    else:
        print("No data collected.")


NameError: name 'driver' is not defined

In [2]:
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165681,HVAC Seater (2+3),07:00,05h 35m,12:35,4.0,INR 404.76,47 Seats available
1,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165684,HVAC Seater (2+3),07:55,05h 35m,13:30,4.0,INR 404.76,47 Seats available
2,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165687,HVAC Seater (2+3),08:25,05h 35m,14:00,3.8,INR 404.76,45 Seats available
3,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165676,HVAC Seater (2+3),09:25,05h 35m,15:00,4.0,INR 404.76,47 Seats available
4,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165689,HVAC Seater (2+3),10:30,05h 35m,16:05,3.6,INR 404.76,47 Seats available
...,...,...,...,...,...,...,...,...,...,...
1251,Chandigarh to Jawala Ji,https://www.redbus.in/bus-tickets/chandigarh-t...,New Himalaya Travels,Volvo A/C Semi Sleeper (2+2),01:50,02h 45m,04:35,4.4,INR 450,14 Seats available
1252,Chandigarh to Jawala Ji,https://www.redbus.in/bus-tickets/chandigarh-t...,Laxmi holidays,Volvo 9600 A/C Semi Sleeper (2+2),23:55,04h 50m,04:45,4.7,664,45 Seats available
1253,Chandigarh to Jawala Ji,https://www.redbus.in/bus-tickets/chandigarh-t...,Asian's Shina Tour & Travels (Swoyambhu Manjus...,A/C Semi Sleeper (2+2),00:29,04h 31m,05:00,2.5,INR 999,35 Seats available
1254,Chandigarh to Jawala Ji,https://www.redbus.in/bus-tickets/chandigarh-t...,Northern Travels,Volvo Multi-Axle B9R A/c Semi Sleeper (2+2),01:20,04h 55m,06:15,2.6,INR 529,17 Seats available
