In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.keys import Keys
import numpy as np
import time

In [2]:
def get_routes_and_links(driver):
    """Fetch route names and links from the page."""
    wait = WebDriverWait(driver, 30)
    elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//a[@class='route']")))
    
    routes, links = [], []
    for element in elements:
        name = element.text
        link = element.get_attribute('href')
        routes.append(name)
        links.append(link)
    
    return routes, links

def navigate_to_next_page(driver, page_number):
    """Navigate to the next page if available."""
    wait = WebDriverWait(driver, 30)
    
    try:
        # Locate the pagination container
        pagination_container = wait.until(EC.presence_of_element_located(
            (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
        ))

        # Locate the next page button
        next_page_button = pagination_container.find_element(
            By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
        )

        # Scroll to the next page button
        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        time.sleep(1)  # Allow time for scroll

        print(f"Clicking on page {page_number + 1}")
        next_page_button.click()

        # Wait for the page number to update
        wait.until(EC.text_to_be_present_in_element(
            (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)
        ))

        print(f"Successfully navigated to page {page_number + 1}")
        time.sleep(3)  # Wait for the new page to load

    except Exception as e:
        print(f"An error occurred while navigating to page {page_number + 1}: {e}")

def main(url, num_pages):
    """Main function to scrape routes and links."""
    driver = webdriver.Chrome()  # Ensure the correct WebDriver is installed
    driver.get(url)

    all_routes, all_links = [], []

    for page_number in range(1, num_pages + 1):
        routes, links = get_routes_and_links(driver)
        all_routes.extend(routes)
        all_links.extend(links)

        if page_number < num_pages:  # Skip navigation on the last page
            navigate_to_next_page(driver, page_number)

    # Save the results to a CSV file
    df = pd.DataFrame({'Route_Name': all_routes, 'Link1': all_links})
    df.to_csv('kada1.csv', index=False)
    driver.quit()  # Close the driver

if __name__ == "__main__":
    url = "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile"  # Your target URL
    main(url, 5)  # Scrape data from the first 5 pages


Clicking on page 2
Successfully navigated to page 2
Clicking on page 3
Successfully navigated to page 3
Clicking on page 4
Successfully navigated to page 4
Clicking on page 5
Successfully navigated to page 5


In [3]:
#read the csv file
df1_Ka=pd.read_csv("kada1.csv")
df1_Ka


Unnamed: 0,Route_Name,Link1
0,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa
1,Goa to Pune,https://www.redbus.in/bus-tickets/goa-to-pune
2,Mumbai to Goa,https://www.redbus.in/bus-tickets/mumbai-to-goa
3,Bangalore to Goa,https://www.redbus.in/bus-tickets/bangalore-to...
4,Belagavi to Goa,https://www.redbus.in/bus-tickets/belagavi-to-goa
5,Goa to Bangalore,https://www.redbus.in/bus-tickets/goa-to-banga...
6,Goa to Mumbai,https://www.redbus.in/bus-tickets/goa-to-mumbai
7,Goa to Belagavi,https://www.redbus.in/bus-tickets/goa-to-belagavi
8,Pandharpur to Goa,https://www.redbus.in/bus-tickets/pandharpur-t...
9,Goa to Pandharpur,https://www.redbus.in/bus-tickets/goa-to-pandh...


In [4]:
#retrive the bus details
driver = webdriver.Chrome()
Bus_names_k = []
Bus_types_k = []
Start_Time_k = []
End_Time_k = []
Ratings_k = []
Total_Duration_k = []
Prices_k = []
Seats_Available_k = []
Route_names = []
Route_links = []

for i,r in df1_Ka.iterrows():
    link=r["Link1"]
    routes=r["Route_Name"]

# Loop through each link
    driver.get(link)
    time.sleep(10)  

    # Click on elements to reveal bus details
    elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
    for element in elements:
        element.click()
        time.sleep(2)
    try:
        clicks = driver.find_element(By.XPATH, "//div[@class='button']")
        clicks.click()
    except:
        print("no view button")  
    time.sleep(2)

    scrolling = True
    while scrolling:
            old_page_source = driver.page_source
            # Use ActionChains to perform a PAGE_DOWN
            ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
            time.sleep(2)
            new_page_source = driver.page_source
            if new_page_source == old_page_source:
                scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
    price = driver.find_elements(By.XPATH, '//*[@class="fare d-block"]')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

    # Append data to respective lists
    for bus in bus_name:
        Bus_names_k.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types_k.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Start_Time_k.append(start_time_elem.text)
    for end_time_elem in end_time:
        End_Time_k.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration_k.append(total_duration_elem.text)
    for ratings in rating:
        Ratings_k.append(ratings.text)
    for price_elem in price:
        Prices_k.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available_k.append(seats_elem.text)
        
print("Successfully Completed")



no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
no view button
Successfully Completed


In [5]:
# Proceed with creating the DataFrame
data_Ka= {
    'Bus_name': Bus_names_k,
    'Bus_type': Bus_types_k,
    'Start_time': Start_Time_k,
    'End_time': End_Time_k,
    'Total_duration': Total_Duration_k,
    'Price': Prices_k,
    "Rating" :Ratings_k,
    "Seats_Available": Seats_Available_k,
    'Route_link': Route_links,
    'Route_name': Route_names
}


# Determine the maximum length
max_len = max(len(values) for values in data_Ka.values())

# Normalize the length of each column by padding with NaN
for key in data_Ka:
    while len(data_Ka[key]) < max_len:
        data_Ka[key].append(np.nan)
        
# Create DataFramedf2=pd.read_csv("ap3.csv")
df_buses_1 = pd.DataFrame(data_Ka)

# Save to CSV
df_buses_1.to_csv('kada2.csv', index=False)
print("Data saved to kada2.csv successfully!")


Data saved to kada2.csv successfully!


In [6]:
df2_Ka=pd.read_csv("kada2.csv")
df2_Ka

Unnamed: 0,Bus_name,Bus_type,Start_time,End_time,Total_duration,Price,Rating,Seats_Available,Route_link,Route_name
0,Kadamba Transport Corporation Limited (KTCL) -...,A/C Sleeper (2+1),19:15,05:50,10h 35m,INR 1000,3.8,14 Seats available,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
1,Atmaram Gobus,VE A/C Sleeper (2+1),21:00,07:30,10h 30m,INR 900,4.8,16 Seats available,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
2,Atmaram Gobus,Bharat Benz A/C Sleeper (2+1),23:30,08:45,09h 15m,INR 950,4.8,4 Seats available,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
3,AdAshray Grand,Volvo Multi-Axle A/C Sleeper (2+1),23:00,10:00,11h 00m,INR 1000,4.1,23 Seats available,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
4,Atmaram Gobus,VE A/C Sleeper (2+1),22:30,08:45,10h 15m,INR 900,4.6,11 Seats available,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
...,...,...,...,...,...,...,...,...,...,...
234,Humsafar Travels,Bharat Benz A/C Sleeper (2+1),14:30,09:30,19h 00m,INR 1350,3.9,19 Seats available,https://www.redbus.in/bus-tickets/shirdi-to-goa,Shirdi to Goa
235,PSR Travels Goa (Parrikar),VE A/C Sleeper (2+1),18:00,09:00,15h 00m,INR 1299,3.9,25 Seats available,https://www.redbus.in/bus-tickets/shirdi-to-goa,Shirdi to Goa
236,Kadamba Transport Corporation Limited (KTCL) -...,Volvo AC Seater 2+2,13:30,14:01,00h 31m,INR 100,,38 Seats available,https://www.redbus.in/bus-tickets/calangute-go...,Calangute (goa) to Panaji
237,Kadamba Transport Corporation Limited (KTCL) -...,AC Seater (2+2),15:30,16:01,00h 31m,INR 100,,41 Seats available,https://www.redbus.in/bus-tickets/calangute-go...,Calangute (goa) to Panaji


In [33]:
df2_Ka.tail(20)

Unnamed: 0,Bus_name,Bus_type,Start_time,End_time,Total_duration,Price,Rating,Seats_Available,Route_link,Route_name
166,Kadamba Transport Corporation Limited (KTCL) -...,A/C Seater (2+3),17:30,18:00,00h 30m,INR 150,4.7,47 Seats available,https://www.redbus.in/bus-tickets/mopa-airport...,Mopa Airport to Goa
167,Ashray Travels,Bharat Benz A/C Sleeper (2+1),22:30,08:00,09h 30m,INR 900,4.8,9 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
168,IntrCity SmartBus,Bharat Benz A/C Sleeper (2+1),21:30,05:40,08h 10m,INR 900,4.7,3 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
169,Atmaram Gobus,VE A/C Sleeper (2+1),22:30,07:00,08h 30m,INR 900,4.5,6 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
170,Ashray Grand,Volvo Multi-Axle A/C Sleeper (2+1),22:00,07:30,09h 30m,INR 1050,4.6,6 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
171,IntrCity SmartBus,A/C Sleeper (2+1),19:45,03:55,08h 10m,INR 900,4.3,7 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
172,Atmaram Gobus,NON A/C Sleeper (2+1),20:01,05:16,09h 15m,INR 750,4.4,17 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
173,IntrCity SmartBus,Bharat Benz A/C Seater /Sleeper (2+1),20:30,04:40,08h 10m,INR 600,3.7,7 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
174,Atmaram Travels ( Gogte ),NON A/C Sleeper (2+1),21:01,05:31,08h 30m,INR 800,4.5,17 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
175,Giroba Travels (Anand Royal),Non A/C Seater / Sleeper (2+1),18:30,04:45,10h 15m,INR 500,4.3,25 Seats available,https://www.redbus.in/bus-tickets/goa-to-satara,Goa to Satara
