In [None]:
#In this code, i have used selenium action chains to move to next pages-1,2,3,4,5. 
#Page_number variable to store current page number

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome()
wait = WebDriverWait(driver, 30)  # Increased timeout

driver.get("https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile")

all_data = []

def scrape_page():
    # Locate elements
    routes = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "route_link")))

    # Loop through each route to extract details
    for route in routes:
        try:
            route_link = route.find_element(By.CSS_SELECTOR, ".route_details a").get_attribute("href")
            title = route.find_element(By.CSS_SELECTOR, ".route_details a").get_attribute("title")
            start_price = route.find_element(By.CSS_SELECTOR, ".route_details .fare").text.strip()
            bus_options = route.find_element(By.CSS_SELECTOR, ".row2 .totalRoutes:nth-of-type(1)").text.strip()
            first_bus = route.find_element(By.CSS_SELECTOR, ".row2 .totalRoutes:nth-of-type(2) strong").text.strip()
            last_bus = route.find_element(By.CSS_SELECTOR, ".row2 .totalRoutes:nth-of-type(3) strong").text.strip()

            # Append extracted data to list
            all_data.append({
                "route_link": route_link,
                "title": title,
                "start_price": start_price,
                "bus_options": bus_options,
                "first_bus": first_bus,
                "last_bus": last_bus
            })
        except Exception as e:
            print(f"An error occurred: {e}")
            continue

# Scrape data from the first 5 pages
for page_number in range(1, 6):
    scrape_page()
    if page_number < 5:  # Don't try to click next on the last page
        try:
            # Locate the pagination container
            pagination_container = wait.until(EC.presence_of_element_located(
                (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
            ))

            # Locate the next page button within the container
            next_page_button = pagination_container.find_element(
                By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
            )
            
            # Ensure the next page button is in view
            actions = ActionChains(driver)
            actions.move_to_element(next_page_button).perform()
            time.sleep(1)  # Wait for a bit after scrolling
            
            # Log the action
            print(f"Clicking on page {page_number + 1}")
            
            # Click the next page button
            next_page_button.click()
            
            # Wait for the page number to update to the next page
            wait.until(EC.text_to_be_present_in_element(
                (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))
            
            # Log the successful page navigation
            print(f"Successfully navigated to page {page_number + 1}")
            
            # Wait for a short duration to ensure the next page loads completely
            time.sleep(3)
        except Exception as e:
            print(f"An error occurred while navigating to page {page_number + 1}: {e}")
            break

# Print the scraped data
for entry in all_data:
    print(entry)


In [None]:
import pandas as pd
pd.DataFrame(all_data)

In [None]:
#2nd way to scroll down the page using driver.page_source option using selenium
#its a built in option offered by selenium
#Check the Scroll and collect function to understand how it works
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

driver = webdriver.Chrome()
wait = WebDriverWait(driver, 30)

# Assuming all_data is defined somewhere in your script
route_links = [entry['route_link'] for entry in all_data]

bus_data = []

def scroll_and_collect():
    body = driver.find_element(By.TAG_NAME, 'body')
    scrolling = True
    
    while scrolling:
        old_page_source = driver.page_source
        body.send_keys(Keys.PAGE_DOWN)
        time.sleep(2)  # Allow time for the page to load new content
        new_page_source = driver.page_source

        if new_page_source == old_page_source:
            scrolling = False
    
    # Get bus items after scrolling
    bus_items_after_scroll = driver.find_elements(By.CLASS_NAME, "bus-item")

def scrape_bus_data(route_link):
    driver.get(route_link)
    wait = WebDriverWait(driver, 30)  # Increase timeout to 30 seconds

    try:
        # Find all "View Buses" buttons and click them
        
        view_buses_buttons = wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//div[contains(@class, 'button') and contains(text(), 'View Buses')]")))

        for button in view_buses_buttons:
            try:
                button.click()
                time.sleep(3)  # Wait for the content to load
            except Exception as e:
                print(f"An error occurred while clicking the button: {e}")
                continue
        
        # Collect bus details
        scroll_and_collect()
        bus_items = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "bus-item")))
        for bus in bus_items:
            try:
                busname = bus.find_element(By.CLASS_NAME, "travels").text
                bustype = bus.find_element(By.CLASS_NAME, "bus-type").text
                departing_time = bus.find_element(By.CLASS_NAME, "dp-time").text
                duration = bus.find_element(By.CLASS_NAME, "dur").text
                reaching_time = bus.find_element(By.CLASS_NAME, "bp-time").text
                star_rating = bus.find_element(By.CSS_SELECTOR, ".rating-sec .rating span").text
                price = bus.find_element(By.CSS_SELECTOR, ".seat-fare .fare span").text
                seats_available = bus.find_element(By.CLASS_NAME, "seat-left").text

                bus_data.append({
                    "route_link": route_link,
                    "busname": busname,
                    "bustype": bustype,
                    "departing_time": departing_time,
                    "duration": duration,
                    "reaching_time": reaching_time,
                    "star_rating": star_rating,
                    "price": price,
                    "seats_available": seats_available
                })
            except Exception as e:
                print(f"An error occurred while scraping bus data: {e}")
                continue
    except TimeoutException as e:
        print(f"Timeout occurred while waiting for elements: {e}")
    except Exception as e:
        print(f"An error occurred during scraping: {e}")

# Scrape data for each route link
for route_link in route_links:
    scrape_bus_data(route_link)
    time.sleep(4)

# Print the scraped bus data
for entry in bus_data:
    print(entry)
                