In [2]:
#import all the modules required for the program
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd 
import time as time_module
from datetime import datetime


In [3]:

# Scroll to the bottom of the page
def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait for the page to load new content (if any)
        time_module.sleep(2)  # Adjust this sleep time based on your page's load time

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

In [4]:
#Function to return the route links of respective RTC Government bus services
def page_navigation(rtc_link):
    driver = webdriver.Chrome()
    driver.get(rtc_link)
    time_module.sleep(3)
    driver.maximize_window()
    current_page = 2
    max_page=100
    rtc_list = []
    while(current_page<=max_page):
        try:
            time_module.sleep(5)
            paths=driver.find_elements(By.XPATH,"//a[@class='route']")
            for path in paths:
                print(path.get_attribute('title'),path.get_attribute("href"))
                rtc_list.append({"route_name":path.get_attribute("title"), "route_link":path.get_attribute("href")})          
            next_page_button = driver.find_element(By.XPATH,f'//*[@class="DC_117_pageTabs " and contains(text(),"{current_page}")]')
            driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
            time_module.sleep(3)
            next_page_button.click()
            current_page+=1
        except NoSuchElementException:
            print(f"Reached the last page or could not find the next page button.{current_page}")
            break
    driver.quit  # stops the page navigation driver once reached the last page
    return rtc_list

In [6]:
#Function to get required bus name,bus type,starting time,duration,reaching time,price,star rating,
# seat availability via route_link for each route from redbus website using selenium respective XPATH.
def bus_route_details(route_name,route_link):
    driver = webdriver.Chrome()  # Optional argument, if not specified will search path.
    driver.maximize_window()
    driver.get(route_link)
    time_module.sleep(5)
    try:
        while(True):
            # Wait for the presence of the elements
            WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, '//div[@class="button" and contains(text(), "View Buses")]'))
            )
            # Locate all elements with the specified XPath
            button = driver.find_element(By.XPATH, '//div[@class="button" and contains(text(), "View Buses")]')
            button.click()
            time_module.sleep(2)
    except:
        print("No More View Buses Button")
    
    #Scroll to bottom of page
    scroll_to_bottom(driver)
    
    
    print("Finding Bus Names")
    bus_names=driver.find_elements(By.XPATH,"//div[@class='travels lh-24 f-bold d-color']")
    bus_route_name_list=[]
    bus_route_link_list=[]
    bus_name_list = []
    for bus_name in bus_names:
        bus_name_list.append(bus_name.text)
        bus_route_name_list.append(route_name)
        bus_route_link_list.append(route_link)

    print("Finding Bus Types")
    bus_types=driver.find_elements(By.XPATH,"//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    bus_types_list = []
    for bus_type in bus_types:
        bus_types_list.append(bus_type.text)    
    
    print("Finding Bus Departing Time")
    departing_time=driver.find_elements(By.XPATH,"//div[@class='dp-time f-19 d-color f-bold']")
    departing_time_list=[]
    for time in departing_time:
        departing_time_list.append(time.text)
    
    print("Finding Bus Duration")
    durations=driver.find_elements(By.XPATH,"//div[@class='dur l-color lh-24']")
    duration_list=[]
    for duration in durations:
        duration_list.append(duration.text)
    
    print("Finding Bus Reaching Time")
    reaching_time=driver.find_elements(By.XPATH,"//div[@class='bp-time f-19 d-color disp-Inline']")
    reaching_time_list=[]
    for times in reaching_time:
        reaching_time_list.append(times.text)
    
    print("Finding Bus Ticket Price")
    price_div=driver.find_elements(By.XPATH,"//div[@class='fare d-block']")
    price_list=[]
    for e in price_div:
        price_span = e.find_element(By.XPATH,".//span")
        price_list.append(price_span.text)
    
    print("Finding Bus Seat Availability")
    seat_availability=driver.find_elements(By.XPATH,"//div[@class='column-eight w-15 fl']") 
    seat_availability_list=[]
    for seat in seat_availability:
        seat_availability_list.append(seat.text)
    
    print("Finding Bus Rating")
    rating_div=driver.find_elements(By.XPATH,"//div[@class='column-six p-right-10 w-10 fl']") 
    star_ratings_list = []
    for i,e in enumerate(rating_div):
        try:
            rating_span = e.find_element(By.XPATH,".//span")
            star_ratings_list.append(rating_span.text)
        except NoSuchElementException:
            star_ratings_list.append('')
            
    #For debugging
    # print(len(bus_name_list),len(bus_types_list),len(departing_time_list),len(duration_list),len(reaching_time_list),len(star_ratings_list),len(price_list),len(seat_availability_list))

    buses  = {"bus_route_name":bus_route_name_list, "bus_route_link":bus_route_link_list, "bus_name":bus_name_list,"bus_type":bus_types_list,"departing_time":departing_time_list,"duration":duration_list,"reaching_time":reaching_time_list,"start_rating":star_ratings_list,"price":price_list,"seat_availability":seat_availability_list}
    
    print("Creating Pandas Dataframe")
    bus_df = pd.DataFrame(buses)

    # driver.quit()
    return bus_df


In [None]:
# Test function call to dispay dataframe -all 46 records
# df=bus_route_details('dummy' ,'https://www.redbus.in/bus-tickets/ananthapur-to-bangalore')
# df