In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
import time
import pandas as pd

driver = webdriver.Chrome()
driver.maximize_window()   
driver.get(#give the url here)
time.sleep(3)  # Wait for the page to load
pages = #give the no.of pages 
route_data = []

# Try block for route extraction
try:    
    for page in range(1, pages + 1):
        if page > 1:
            # Wait for the pagination tab and click it
            pagination_tab = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs') and text()='{page}']"))
            )
            driver.execute_script("arguments[0].click();", pagination_tab)
            time.sleep(3)  # Wait for the page to load
        
        # Re-fetch the route elements after navigating to the new page
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')

        for route in route_elements:
            try:
                route_links = route.get_attribute('href')
                route_names = route.text.strip()
                route_data.append({"Route_Link": route_links, "Route_Name": route_names})
            except StaleElementReferenceException:
                print("Element became stale, skipping...")

except TimeoutException:
    print("Timed out during route extraction")

# Extract bus details from route data
bus_data = []

for links in route_data:
    try:                         
        driver.get(links["Route_Link"])
        time.sleep(3)
        
        # Wait for the "View Buses" button and click it
        view_buses_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "button"))
        )
        view_buses_button.click()
        time.sleep(3)  # Wait for buses to load
    except TimeoutException:
        print(f"Timeout: No button found for {links['Route_Link']}")

    try:
        # Scroll down to load all bus items
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for the page to load more content
        
        # Extract bus data
        bus_names = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_types = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        reach = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        star_ratings = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        
        # Handle seat availability using XPath
        seat_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")
        
        # Ensure all lists have the same length, otherwise handle missing data
        num_buses = len(bus_names)
        
        for i in range(num_buses):
            bus_detail = {
                "Route_Name": links["Route_Name"],
                "Route_Link": links["Route_Link"],
                "Bus_Name": bus_names[i].text if i < len(bus_names) else '',
                "Bus_Type": bus_types[i].text if i < len(bus_types) else '',
                "Departure": departure[i].text if i < len(departure) else '',
                "Duration": duration[i].text if i < len(duration) else '',
                "Reach": reach[i].text if i < len(reach) else '',
                "Star_Ratings": star_ratings[i].text if i < len(star_ratings) else '0',
                "Price": price[i].text if i < len(price) else '',
                "Seat_Availability": seat_available[i].text if i < len(seat_available) else '0'
            }
            bus_data.append(bus_detail)
    except Exception as e:
        print(f"Error extracting bus data for {links['Route_Link']}: {str(e)}")

# Convert the bus_data to a pandas DataFrame
df = pd.DataFrame(bus_data)

# Check if df is empty
if df.empty:
    print("No data was extracted.")
else:
    # Dump the DataFrame to a CSV file
    df.to_csv('ker_bus_details.csv', index=False)

# Quit the driver after scraping is complete
driver.quit()

print("Data extraction completed and saved to bus_details.csv")