In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import pandas as pd
from datetime import datetime
import time

date_format = "%H:%M"

# Initialize the Chrome driver using a context manager
with webdriver.Chrome() as driver:
    # Open the specified URL
    driver.get('https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile')
    driver.maximize_window()
    
    # Define a list of route elements
    pagination = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div.DC_117_paginationTable"))
    )
    page_numbers = len(pagination.find_elements(By.CSS_SELECTOR, "div.DC_117_pageTabs "))
    print("Page_Name",page_numbers)
    masterroute_urls = []
    master_route_title = []

    for page_no in range(1,page_numbers+1):
        if page_no >1:
            page_number_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//div[@class='DC_117_pageTabs ' and text()='{page_no}']")))    
            
            driver.execute_script("arguments[0].click();", page_number_element) 
            time.sleep(3)

        all_routes = driver.find_elements(By.CSS_SELECTOR, "div.route_details a.route")
        route_urls = [route.get_attribute('href') for route in all_routes]
        route_titles = [route.get_attribute('title') for route in all_routes]

        #masterroute_urls = [route_url for route_url in route_urls]
        masterroute_urls.extend(route_urls)
        #master_route_title = [route.get_attribute('title') for route in all_routes]
        master_route_title.extend(route_titles)

    print("route_urls",len(masterroute_urls))
    
    # List to store all routes' data
    all_routes_data = []

    for route_url, route_title in zip(masterroute_urls, master_route_title):
        try:
            # Navigate to the route URL
            driver.get(route_url)
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div.clearfix.bus-item-details"))
            )
            try :
                # Wait for the view buses buttons to be present
                view_buses_buttons = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH,"//div[@class='button' and normalize-space()='View Buses']")))
                #print("view_buses_buttons",len(view_buses_buttons))
                # Critical Point
                if len(view_buses_buttons) ==2:
                    busViewClick = driver.find_elements(By.CSS_SELECTOR, "div.button")
                    body=driver.find_element(By.TAG_NAME,'body')
                    body.send_keys(Keys.DOWN)
                    time.sleep(2)
                    
                    time.sleep(5)
                    busViewClick[1].click()
                    time.sleep(3)
                    busViewClick[0].click()
                    time.sleep(3)
                elif len(view_buses_buttons) ==1:
                    busViewClick = driver.find_elements(By.CSS_SELECTOR, "div.button")
                    body=driver.find_element(By.TAG_NAME,'body')
                    body.send_keys(Keys.DOWN)
                    time.sleep(2)
                    
                    time.sleep(5)
                    busViewClick[0].click()
                    time.sleep(3)
            except TimeoutException as e:
                print("In this Route has No any government buses are available")
                
            # Scroll down the page until the end
            scrolling = True
            while scrolling:
                old_page_position = driver.execute_script("return window.pageYOffset;")
                body = driver.find_element(By.TAG_NAME, "body")
                body.send_keys(Keys.PAGE_DOWN)

                # Use explicit wait instead of time.sleep
                try:
                    WebDriverWait(driver, 10).until(
                        lambda driver: driver.execute_script("return window.pageYOffset;") != old_page_position
                    )
                except TimeoutException:
                    scrolling = False

            # Find all bus items on the page
            all_buses = driver.find_elements(By.CSS_SELECTOR, "div.clearfix.bus-item-details")
            bus_data = []

            for bus in all_buses:
                try:
                    single_bus_info = [route_title]
                    busname = bus.find_element(By.CSS_SELECTOR, "div.travels.lh-24.f-bold.d-color").text
                    bustype = bus.find_element(By.CSS_SELECTOR, "div.bus-type.f-12.m-top-16.l-color.evBus").text
                    departing_time = bus.find_element(By.CSS_SELECTOR, "div.dp-time.f-19.d-color.f-bold").text
                    duration = bus.find_element(By.CSS_SELECTOR, "div.dur.l-color.lh-24").text
                    reaching_time = bus.find_element(By.CSS_SELECTOR, "div.bp-time.f-19.d-color.disp-Inline").text
                    star_rating = bus.find_element(By.CSS_SELECTOR, "div[class='column-six p-right-10 w-10 fl']").text
                    price = bus.find_element(By.CSS_SELECTOR, "div.seat-fare").text
                    seats_available = bus.find_element(By.CSS_SELECTOR, "div.column-eight.w-15.fl").text.split()[0]

                    single_bus_info.extend([busname, bustype])
                    single_bus_info.append(datetime.strptime(departing_time, date_format).time())
                    single_bus_info.append(duration)
                    single_bus_info.append(datetime.strptime(reaching_time, date_format).time())
                    
                    if star_rating in ["New", "", " "]:
                        single_bus_info.append(0.00)
                    else:
                        single_bus_info.append(float(star_rating.split()[0]))
                    
                    single_bus_info.append(price)
                    single_bus_info.append(int(seats_available))
                    
                    bus_data.append(single_bus_info)
                except NoSuchElementException as e:
                    print(f"Element not found in bus data extraction: {str(e)}")
            
            all_routes_data.extend(bus_data)
            print(f"Scraped data for route: {route_url}, Total buses: {len(bus_data)}")
        except Exception as e:
            print(f"Error processing route '{route_url}': {str(e)}")

# Convert the list of lists to a DataFrame
columns = ['Route Title', 'Bus Name', 'Bus Type', 'Departing Time', 'Duration', 'Reaching Time', 'Star Rating', 'Price', 'Seats Available']
df = pd.DataFrame(all_routes_data, columns=columns)

# Save to Excel file
output_file = "C:/Users/USER/Desktop/RDB1/rd.csv"
df.to_csv(output_file, index=False)
print(f"All routes data saved to {output_file}")