In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import pandas as pd
import time

def setup_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.implicitly_wait(10)
    return driver

def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

def get_route_links(driver):
    wait = WebDriverWait(driver, 20)
    scroll_to_bottom(driver)

    # Find all route elements
    route_elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@class, "route")]')))
    
    route_links = []
    for route_element in route_elements:
        try:
            route_name = route_element.text
            route_link = route_element.get_attribute('href')
            if route_name and route_link:
                route_links.append((route_name, route_link))
        except Exception as e:
            print(f"Error extracting route data: {e}")

    return route_links

def click_element(driver, by, value):
    try:
        # Wait for the element to be clickable
        element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((by, value))
        )
        # Scroll the element into view
        driver.execute_script("arguments[0].scrollIntoView(true);", element)
        time.sleep(1)  # Short wait to ensure element is visible
        driver.execute_script("arguments[0].click();", element)
        time.sleep(2)  # Short delay to ensure the click is registered
    except Exception as e:
        print(f"Error clicking element: {e}")

def get_bus_details(driver, route_name, route_link):
    wait = WebDriverWait(driver, 20)
    
    # Click all "View Buses" buttons
    try:
        # Click the first set of "View Buses" buttons
        view_buses_buttons = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//i[@class="p-left-10 icon icon-down"]')))
        for button in view_buses_buttons:
            click_element(driver, By.XPATH, './/i[@class="p-left-10 icon icon-down"]')
        
    except Exception as e:
        print(f"Error clicking 'View Buses' buttons: {e}")
    
    scroll_to_bottom(driver)
    
    buses = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[contains(@class, "clearfix bus-item")]')))

    bus_details = []

    for bus in buses:
        try:
            bus_name = bus.find_element(By.XPATH, './/div[contains(@class, "travels lh-24 f-bold d-color")]').text
            bus_type = bus.find_element(By.XPATH, './/div[contains(@class, "bus-type f-12 m-top-16 l-color evBus")]').text  
            departing_time = bus.find_element(By.XPATH, './/div[contains(@class, "dp-time f-19 d-color f-bold")]').text 
            duration = bus.find_element(By.XPATH, './/div[contains(@class, "dur l-color lh-24")]').text 
            reaching_time = bus.find_element(By.XPATH, './/div[contains(@class, "bp-time f-19 d-color disp-Inline")]').text 
            star_rating = bus.find_element(By.XPATH, './/div[contains(@class, "column-six p-right-10 w-10 fl")]').text
            bus_fare = bus.find_element(By.XPATH, './/div[contains(@class, "fare d-block")]').text 
            seat_availability = bus.find_element(By.XPATH, './/div[contains(@class, "column-eight w-15 fl")]').text 

            bus_details.append({
                'route_name': route_name,
                'route_link': route_link,
                'bus_name': bus_name,
                'bus_type': bus_type,
                'departing_time': departing_time,
                'duration': duration,
                'reaching_time': reaching_time,
                'star_rating': star_rating, 
                'bus_fare': bus_fare,
                'seat_availability': seat_availability
            })
        except Exception as e:
            print(f"Error extracting data for a bus: {e}")

    return bus_details

def handle_pagination(driver, base_url):
    wait = WebDriverWait(driver, 20)
    all_bus_details = []
    driver.get(base_url)

    for page_number in range(1, 3): 
        route_links = get_route_links(driver)
        
        for route_name, route_link in route_links:
            try:
                driver.get(route_link)
                time.sleep(5)  # Wait for the new page to load

                bus_details = get_bus_details(driver, route_name, route_link)
                all_bus_details.extend(bus_details)
                
                driver.get(base_url)
                time.sleep(5)  # Adjust as needed for the page to load
                
            except Exception as e:
                print(f"Error processing route {route_name}: {e}")

        if page_number < 2:
            try:
                pagination_container = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')))
                next_page_button = pagination_container.find_element(By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]')

                driver.maximize_window()
                actions = ActionChains(driver)
                actions.move_to_element(next_page_button).perform()
                time.sleep(2)

                next_page_button.click()
                wait.until(EC.text_to_be_present_in_element(
                    (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))

                print(f'Successfully navigated to page {page_number + 1}')

            except Exception as e:
                print(f"Error during pagination: {e}")

    return all_bus_details

def main(url):
    driver = setup_driver()
    
    all_bus_details = handle_pagination(driver, url)
    
    driver.quit()
    
    # Save to CSV
    df = pd.DataFrame(all_bus_details)
    df.to_csv('KSRTC_bus_details.csv', index=False)
    print("Data scraping completed and saved to 'bus_details.csv'")

# URL to scrape
main('https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile')
