In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# URL of the JKSRTC website
URL = "https://www.redbus.in/online-booking/jksrtc"

def initialize_driver():
    """Initialize and return a Selenium WebDriver instance."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Load the specified URL in the browser."""
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    """Scrape all bus routes and their links."""
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        route_links = [route.get_attribute('href') for route in route_elements]
        route_names = [route.text.strip() for route in route_elements]
        return route_links, route_names
    except Exception as e:
        print(f"Error scraping routes: {e}")
        return [], []

def scrape_bus_details(driver, url, route_name):
    """Scrape bus details for a given route."""
    try:
        driver.get(url)
        time.sleep(5)

        # Click 'View Buses' button if it exists
        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)  # Wait for buses to load
        except Exception as e:
            print(f"'View Buses' button not found for {route_name}: {e}")

        # Scroll to load all bus items
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        # Scrape bus details
        bus_names = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_types = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_times = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        durations = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_times = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        ratings = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        prices = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        bus_details = []
        for i in range(len(bus_names)):
            bus_details.append({
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_names[i].text if i < len(bus_names) else "N/A",
                "Bus_Type": bus_types[i].text if i < len(bus_types) else "N/A",
                "Departure_Time": departure_times[i].text if i < len(departure_times) else "N/A",
                "Duration": durations[i].text if i < len(durations) else "N/A",
                "Arrival_Time": arrival_times[i].text if i < len(arrival_times) else "N/A",
                "Rating": ratings[i].text if i < len(ratings) else "N/A",
                "Price": prices[i].text if i < len(prices) else "N/A",
                "Seats_Available": seats[i].text if i < len(seats) else "N/A"
            })

        return bus_details
    except Exception as e:
        print(f"Error scraping bus details for {route_name}: {e}")
        return []

def scrape_all_pages():
    """Scrape data from all pages."""
    driver = initialize_driver()
    all_bus_details = []

    try:
        load_page(driver, URL)

        # Scrape routes for the first page
        route_links, route_names = scrape_bus_routes(driver)

        # Scrape bus details for each route
        for route_url, route_name in zip(route_links, route_names):
            bus_details = scrape_bus_details(driver, route_url, route_name)
            all_bus_details.extend(bus_details)

        # Check for pagination and scrape subsequent pages
        next_page_element = driver.find_element(By.XPATH, "//a[contains(text(), 'Next')]")
        while next_page_element:
            try:
                next_page_element.click()
                time.sleep(5)  # Wait for the next page to load
                route_links, route_names = scrape_bus_routes(driver)
                
                # Scrape details for new routes
                for route_url, route_name in zip(route_links, route_names):
                    bus_details = scrape_bus_details(driver, route_url, route_name)
                    all_bus_details.extend(bus_details)
                
                next_page_element = driver.find_element(By.XPATH, "//a[contains(text(), 'Next')]")
            except Exception as e:
                print(f"Error during pagination: {e}")
                break

    finally:
        driver.quit()

    return all_bus_details

if __name__ == "__main__":
    print("Scraping JKSRTC data...")
    all_data = scrape_all_pages()

    # Save data to a CSV file
    if all_data:
        df = pd.DataFrame(all_data)
        df.to_csv("jk_bus_details.csv", index=False)
        print("Data successfully saved to 'jk_bus_details.csv'")
    else:
        print("No data scraped.")


Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/srinagar-to-jammu: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF75ECBEEA2+31554]
	(No symbol) [0x00007FF75EC37ED9]
	(No symbol) [0x00007FF75EAF872A]
	(No symbol) [0x00007FF75EB48434]
	(No symbol) [0x00007FF75EB4853C]
	(No symbol) [0x00007FF75EB8F6A7]
	(No symbol) [0x00007FF75EB6D06F]
	(No symbol) [0x00007FF75EB8C977]
	(No symbol) [0x00007FF75EB6CDD3]
	(No symbol) [0x00007FF75EB3A33B]
	(No symbol) [0x00007FF75EB3AED1]
	GetHandleVerifier [0x00007FF75EFC8B1D+3217341]
	GetHandleVerifier [0x00007FF75F015AE3+3532675]
	GetHandleVerifier [0x00007FF75F00B0E0+3489152]
	GetHandleVerifier [0x00007FF75ED6E776+750614]
	(No symbol) [0x00007FF75EC4375F]
	(No symbol) [0x00007FF75EC3EB14]
	(No symbol) [0x00007FF75EC3ECA2]
	(No symbol) [0x00007FF75EC2E16F]
	BaseThreadInitThunk [0x00007FFE5633257D+29]
	RtlUserThreadStart [0x00007FFE572CAF28+40]

Error occurred while scraping bus details for https://www.redbus.in/bus-tick

NameError: name 'driver' is not defined

In [2]:
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...,New Pal Travels,NON A/C Seater / Sleeper (2+2),20:15,10h 00m,06:15,1.5,719,33 Seats available
1,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...,Harikesh Tour N Travels,NON A/C Seater / Sleeper (2+2),20:30,09h 30m,06:00,1.1,674,40 Seats available
2,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...,North Kashmir Tour and Travels,NON A/C Sleeper (2+2),16:30,10h 00m,02:30,1.0,INR 699,40 Seats available
3,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...,North Kashmir Tour and Travels,NON A/C Sleeper (2+2),17:15,10h 00m,03:15,1.0,INR 699,40 Seats available
4,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...,DIVA BUS,AC Sleeper (2+1),15:00,17h 00m,08:00,4.3,INR 2700,33 Seats available
...,...,...,...,...,...,...,...,...,...,...
244,Jammu (j and k) to Amritsar,https://www.redbus.in/bus-tickets/jammu-to-amr...,Gajraj bus service,Bharat Benz A/C Sleeper (2+1),17:35,05h 10m,22:45,3.9,INR 1000,34 Seats available
245,Jammu (j and k) to Amritsar,https://www.redbus.in/bus-tickets/jammu-to-amr...,Gajraj bus service,Bharat Benz A/C Sleeper (2+1),18:30,05h 25m,23:55,2.2,INR 899,15 Seats available
246,Jammu (j and k) to Amritsar,https://www.redbus.in/bus-tickets/jammu-to-amr...,Vijay Tour and Travels,AC Sleeper (2+1),14:30,05h 10m,19:40,2.2,INR 800,29 Seats available
247,Jammu (j and k) to Amritsar,https://www.redbus.in/bus-tickets/jammu-to-amr...,Vijay Tour and Travels,Bharat Benz A/C Sleeper (2+1),16:30,04h 30m,21:00,0,INR 700,30 Seats available
