In [3]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.keys import Keys

import time
import numpy as np

In [4]:
def get_routes_and_links(driver):
    """Fetch route names and links from the page."""
    wait = WebDriverWait(driver, 30)
    elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//a[@class='route']")))
    
    routes, links = [], []
    for element in elements:
        name = element.text
        link = element.get_attribute('href')
        routes.append(name)
        links.append(link)
    
    return routes, links

def navigate_to_next_page(driver, page_number):
    """Navigate to the next page if available."""
    wait = WebDriverWait(driver, 30)
    
    try:
        # Locate the pagination container
        pagination_container = wait.until(EC.presence_of_element_located(
            (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
        ))

        # Locate the next page button
        next_page_button = pagination_container.find_element(
            By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
        )

        # Scroll to the next page button
        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        time.sleep(1)  # Allow time for scroll

        print(f"Clicking on page {page_number + 1}")
        next_page_button.click()

        # Wait for the page number to update
        wait.until(EC.text_to_be_present_in_element(
            (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)
        ))

        print(f"Successfully navigated to page {page_number + 1}")
        time.sleep(3)  # Wait for the new page to load

    except Exception as e:
        print(f"An error occurred while navigating to page {page_number + 1}: {e}")

def main(url, num_pages):
    """Main function to scrape routes and links."""
    driver = webdriver.Chrome()  # Ensure the correct WebDriver is installed
    driver.get(url)

    all_routes, all_links = [], []

    for page_number in range(1, num_pages + 1):
        routes, links = get_routes_and_links(driver)
        all_routes.extend(routes)
        all_links.extend(links)

        if page_number < num_pages:  # Skip navigation on the last page
            navigate_to_next_page(driver, page_number)

    # Save the results to a CSV file
    df = pd.DataFrame({'Route_Name': all_routes, 'Link1': all_links})#read the csv file
    df.to_csv('ch1.csv', index=False)
    driver.quit()  # Close the driver


if __name__ == "__main__":
    url = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"  # Your target URL
    main(url, 3)  # Scrape data from the first 3 pages


Clicking on page 2
Successfully navigated to page 2
Clicking on page 3
Successfully navigated to page 3


In [6]:
df1_ch=pd.read_csv('ch1.csv')
df1_ch

Unnamed: 0,Route_Name,Link1
0,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...
1,Delhi to Chandigarh,https://www.redbus.in/bus-tickets/delhi-to-cha...
2,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
3,Chandigarh to Vrindavan,https://www.redbus.in/bus-tickets/chandigarh-t...
4,Chandigarh to Shimla,https://www.redbus.in/bus-tickets/chandigarh-t...
5,Chandigarh to Dehradun,https://www.redbus.in/bus-tickets/chandigarh-t...
6,Vrindavan to Chandigarh,https://www.redbus.in/bus-tickets/vrindavan-to...
7,Chandigarh to Yamuna Nagar,https://www.redbus.in/bus-tickets/chandigarh-t...
8,Ludhiana to Chandigarh,https://www.redbus.in/bus-tickets/ludhiana-to-...
9,Dehradun to Chandigarh,https://www.redbus.in/bus-tickets/dehradun-to-...


In [7]:
#retrive the bus details
driver = webdriver.Chrome()
Bus_names_Ch=[]
Bus_types_Ch= []
Start_Time_Ch= []
End_Time_Ch= []
Ratings_Ch= []
Total_Duration_Ch= []
Prices_Ch= []
Seats_Available_Ch= []
Route_names = []
Route_links = []

for i,r in df1_ch.iterrows():
    link=r["Link1"]
    routes=r["Route_Name"]

# Loop through each link
    driver.get(link)
    time.sleep(2)  

    # Click on elements to reveal bus details
    elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
    for element in elements:
        element.click()
        time.sleep(2)
    try:
        clicks = driver.find_element(By.XPATH, "//div[@class='button']")
        clicks.click()
    except:
        print("No View Button ")
    time.sleep(2)
    scrolling = True
    while scrolling:
            old_page_source = driver.page_source
            # Use ActionChains to perform a PAGE_DOWN
            ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
            time.sleep(5)
            new_page_source = driver.page_source
            if new_page_source == old_page_source:
                scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
    except:
        continue
    price = driver.find_elements(By.XPATH, '//*[@class="fare d-block"]')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

    # Append data to respective lists
    for bus in bus_name:
        Bus_names_Ch.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types_Ch.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Start_Time_Ch.append(start_time_elem.text)
    for end_time_elem in end_time:
        End_Time_Ch.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration_Ch.append(total_duration_elem.text)
    for ratings in rating:
        Ratings_Ch.append(ratings.text)
    for price_elem in price:
        Prices_Ch.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available_Ch.append(seats_elem.text)
        
print("Successfully Completed")



No View Button 
No View Button 
No View Button 
No View Button 
No View Button 
No View Button 
No View Button 
No View Button 
Successfully Completed


In [8]:

# Proceed with creating the DataFrame
data_ch = {
    'Bus_name': Bus_names_Ch,
    'Bus_type': Bus_types_Ch,
    'Start_time': Start_Time_Ch,
    'End_time': End_Time_Ch,
    'Total_duration': Total_Duration_Ch,
    'Price': Prices_Ch,
    "Seats_Available": Seats_Available_Ch,
    "Rating" :Ratings_Ch,
    'Route_link': Route_links,
    'Route_name': Route_names
}

# Determine the maximum length
max_len = max(len(values) for values in data_ch.values())

# Normalize the length of each column by padding with NaN
for key in data_ch:
    while len(data_ch[key]) < max_len:
        data_ch[key].append(np.nan)


# Create DataFramedf2=pd.read_csv("ap3.csv")

df_buses_1 = pd.DataFrame(data_ch)

# Save to CSV
df_buses_1.to_csv('ch2.csv', index=False)
print("Data saved to ch2.csv successfully!")


Data saved to ch2.csv successfully!


In [9]:
df2_ch=pd.read_csv("ch2.csv")
df2_ch

Unnamed: 0,Bus_name,Bus_type,Start_time,End_time,Total_duration,Price,Seats_Available,Rating,Route_link,Route_name
0,Chandigarh Transport Undertaking (CTU) - 165681,HVAC Seater (2+3),07:00,12:35,05h 35m,INR 414.3,47 Seats available,4.2,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
1,Chandigarh Transport Undertaking (CTU) - 165684,HVAC Seater (2+3),07:55,13:30,05h 35m,INR 414.3,46 Seats available,4.0,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
2,Chandigarh Transport Undertaking (CTU) - 165687,HVAC Seater (2+3),08:25,14:00,05h 35m,INR 414.3,47 Seats available,3.8,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
3,Chandigarh Transport Undertaking (CTU) - 165676,HVAC Seater (2+3),09:25,15:00,05h 35m,INR 414.3,47 Seats available,3.7,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
4,,,,,,,,,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
...,...,...,...,...,...,...,...,...,...,...
174,Chandigarh Transport Undertaking (CTU) - 165906,HVAC Seater (2+3),20:30,07:30,11h 00m,INR 740.6,47 Seats available,4.0,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Agra
175,Laxmi holidays,Bharat Benz A/C Seater /Sleeper (2+1),16:00,01:20,09h 20m,INR 711,40 Seats available,4.8,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Agra
176,Vijay Tour and Travels,Bharat Benz A/C Seater /Sleeper (2+1),17:30,04:30,11h 00m,989,35 Seats available,3.4,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Agra
177,Sanskar Travels,Bharat Benz A/C Seater /Sleeper (2+1),17:00,01:04,08h 04m,INR 799,25 Seats available,3.8,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Agra
