In [2]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.keys import Keys
import numpy as np
import time

In [3]:

def get_routes_and_links(driver):
    """Fetch route names and links from the page."""
    wait = WebDriverWait(driver, 30)
    elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//a[@class='route']")))
    
    routes, links = [], []
    for element in elements:
        name = element.text
        link = element.get_attribute('href')
        routes.append(name)
        links.append(link)
    
    return routes, links

def navigate_to_next_page(driver, page_number):
    """Navigate to the next page if available."""
    wait = WebDriverWait(driver, 30)
    
    try:
        # Locate the pagination container
        pagination_container = wait.until(EC.presence_of_element_located(
            (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
        ))

        # Locate the next page button
        next_page_button = pagination_container.find_element(
            By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
        )

        # Scroll to the next page button
        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        time.sleep(1)  # Allow time for scroll

        print(f"Clicking on page {page_number + 1}")
        next_page_button.click()

        # Wait for the page number to update
        wait.until(EC.text_to_be_present_in_element(
            (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)
        ))

        print(f"Successfully navigated to page {page_number + 1}")
        time.sleep(3)  # Wait for the new page to load

    except Exception as e:
        print(f"An error occurred while navigating to page {page_number + 1}: {e}")

def main(url, num_pages):
    """Main function to scrape routes and links."""
    driver = webdriver.Chrome()  # Ensure the correct WebDriver is installed
    driver.get(url)

    all_routes, all_links = [], []

    for page_number in range(1, num_pages + 1):
        routes, links = get_routes_and_links(driver)
        all_routes.extend(routes)
        all_links.extend(links)

        if page_number < num_pages:  # Skip navigation on the last page
            navigate_to_next_page(driver, page_number)

    # Save the results to a CSV file
    df = pd.DataFrame({'Route_Name': all_routes, 'Link1': all_links})#read the csv file
    df.to_csv('TS1.csv', index=False)
    driver.quit()  # Close the driver


if __name__ == "__main__":
    url = 'https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile'  # Your target URL
    main(url, 4)  # Scrape data from the first 4   pages


Clicking on page 2
Successfully navigated to page 2
Clicking on page 3
Successfully navigated to page 3
Clicking on page 4
Successfully navigated to page 4


In [5]:
df1_Ts=pd.read_csv("TS1.csv")
df1_Ts

Unnamed: 0,Route_Name,Link1
0,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
1,Hyderabad to Khammam,https://www.redbus.in/bus-tickets/hyderabad-to...
2,Khammam to Hyderabad,https://www.redbus.in/bus-tickets/khammam-to-h...
3,Hyderabad to Srisailam,https://www.redbus.in/bus-tickets/hyderabad-to...
4,Hyderabad to Karimnagar,https://www.redbus.in/bus-tickets/hyderabad-to...
5,Hyderabad to Mancherial,https://www.redbus.in/bus-tickets/hyderabad-to...
6,Hyderabad to Sathupally,https://www.redbus.in/bus-tickets/hyderabad-to...
7,Hyderabad to Adilabad,https://www.redbus.in/bus-tickets/hyderabad-to...
8,Hyderabad to Nirmal,https://www.redbus.in/bus-tickets/hyderabad-to...
9,Hyderabad to Bhadrachalam,https://www.redbus.in/bus-tickets/hyderabad-to...


In [6]:
#retrive the bus details
driver = webdriver.Chrome()
Bus_names_Ts = []
Bus_types_Ts = []
Start_Time_Ts = []
End_Time_Ts = []
Ratings_Ts = []
Total_Duration_Ts = []
Prices_Ts = []
Seats_Available_Ts = []
Route_names = []
Route_links = []

for i,r in df1_Ts.iterrows():
    link=r["Link1"]
    routes=r["Route_Name"]

# Loop through each link
    driver.get(link)
    time.sleep(2)  

    # Click on elements to reveal bus details
    elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
    for element in elements:
        element.click()
        time.sleep(2)
    try:
        clicks = driver.find_element(By.XPATH, "//div[@class='button']")
        clicks.click()
    except:
        continue  
    time.sleep(2)
    scrolling = True
    while scrolling:
            old_page_source = driver.page_source
            # Use ActionChains to perform a PAGE_DOWN
            ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
            time.sleep(5)
            new_page_source = driver.page_source
            if new_page_source == old_page_source:
                scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
    price = driver.find_elements(By.XPATH, '//*[@class="fare d-block"]')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

    # Append data to respective lists
    for bus in bus_name:
        Bus_names_Ts.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types_Ts.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Start_Time_Ts.append(start_time_elem.text)
    for end_time_elem in end_time:
        End_Time_Ts.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration_Ts.append(total_duration_elem.text)
    for ratings in rating:
        Ratings_Ts.append(ratings.text)
    for price_elem in price:
        Prices_Ts.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available_Ts.append(seats_elem.text)
        
print("Successfully Completed")



Successfully Completed


In [9]:
# Proceed with creating the DataFrame
data_Ts= {
    'Bus_name': Bus_names_Ts,
    'Bus_type': Bus_types_Ts,
    'Start_time': Start_Time_Ts,
    'End_time': End_Time_Ts,
    'Total_duration': Total_Duration_Ts,
    'Price': Prices_Ts,
    "Seats_Available": Seats_Available_Ts,
    "Rating" :Ratings_Ts,
    'Route_link': Route_links,
    'Route_name': Route_names
}


# Determine the maximum length
max_len = max(len(values) for values in data_Ts.values())

# Normalize the length of each column by padding with NaN
for key in data_Ts:
    while len(data_Ts[key]) < max_len:
        data_Ts[key].append(np.nan)

# Create DataFramedf2=pd.read_csv("ap3.csv")

df_buses_1 = pd.DataFrame(data_Ts)

# Save to CSV
df_buses_1.to_csv('Ts2.csv', index=False)
print("Data saved to Ts2.csv successfully!")

Data saved to Ts2.csv successfully!


In [10]:
df2_Ts=pd.read_csv("Ts2.csv")
df2_Ts

Unnamed: 0,Bus_name,Bus_type,Start_time,End_time,Total_duration,Price,Seats_Available,Rating,Route_link,Route_name
0,MAHABAHUSRIKVRTRAVELS,Non A/C Seater / Sleeper (2+1),16:00,22:30,06h 30m,INR 800,32 Seats available,3.1,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
1,Puri Jagannadh Tours And Travels,Non A/C Seater / Sleeper (2+1),19:05,02:30,07h 25m,INR 499,36 Seats available,3.9,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
2,DMR Travels,A/C Sleeper (2+1),20:20,02:30,06h 10m,INR 700,29 Seats available,3.7,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
3,SBS Tours and Travels,Non A/C Seater / Sleeper (2+1),20:30,02:24,05h 54m,INR 333,28 Seats available,4.3,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
4,Sri Krishna Travels(VGN),Non A/C Seater / Sleeper (2+1),20:35,03:50,07h 15m,INR 550,17 Seats available,3.7,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
...,...,...,...,...,...,...,...,...,...,...
441,Indu Travels,A/C Sleeper (2+1),21:35,02:30,04h 55m,INR 799,17 Seats available,,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Kurnool
442,Blue Travels,Non A/C Seater / Sleeper (2+1),21:45,04:00,06h 15m,INR 350,33 Seats available,,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Kurnool
443,Matangi Travels,Non A/C Seater / Sleeper (2+1),21:50,01:50,04h 00m,389,20 Seats available,,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Kurnool
444,Sri Balaji Transports,Non A/C Seater / Sleeper (2+1),21:50,03:30,05h 40m,INR 600,26 Seats available,,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Kurnool
