In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.keys import Keys
import numpy as np
import time

In [2]:
def get_routes_and_links(driver):
    """Fetch route names and links from the page."""
    wait = WebDriverWait(driver, 30)
    elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//a[@class='route']")))
    
    routes, links = [], []
    for element in elements:
        name = element.text
        link = element.get_attribute('href')
        routes.append(name)
        links.append(link)
    
    return routes, links

def navigate_to_next_page(driver, page_number):
    """Navigate to the next page if available."""
    wait = WebDriverWait(driver, 30)
    
    try:
        # Locate the pagination container
        pagination_container = wait.until(EC.presence_of_element_located(
            (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
        ))

        # Locate the next page button
        next_page_button = pagination_container.find_element(
            By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
        )

        # Scroll to the next page button
        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        time.sleep(1)  # Allow time for scroll

        print(f"Clicking on page {page_number + 1}")
        next_page_button.click()

        # Wait for the page number to update
        wait.until(EC.text_to_be_present_in_element(
            (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)
        ))

        print(f"Successfully navigated to page {page_number + 1}")
        time.sleep(3)  # Wait for the new page to load

    except Exception as e:
        print(f"An error occurred while navigating to page {page_number + 1}: {e}")

def main(url, num_pages):
    """Main function to scrape routes and links."""
    driver = webdriver.Chrome()  # Ensure the correct WebDriver is installed
    driver.get(url)

    all_routes, all_links = [], []

    for page_number in range(1, num_pages + 1):
        routes, links = get_routes_and_links(driver)
        all_routes.extend(routes)
        all_links.extend(links)

        if page_number < num_pages:  # Skip navigation on the last page
            navigate_to_next_page(driver, page_number)

    # Save the results to a CSV file
    df = pd.DataFrame({'Route_Name': all_routes, 'Link1': all_links})
    df.to_csv('j&k1.csv', index=False)
    driver.quit()  # Close the driver

if __name__ == "__main__":
    url = "https://www.redbus.in/online-booking/jkrtc"  # Your target URL
    main(url, 2)  # Scrape data from the first 2 pages


Clicking on page 2
Successfully navigated to page 2


In [3]:
#read the csv file
df1_jk=pd.read_csv("j&k1.csv")
df1_jk


Unnamed: 0,Route_Name,Link1
0,Srinagar to Jammu (j and k),https://www.redbus.in/bus-tickets/srinagar-to-...
1,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...
2,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...
3,Jammu (j and k) to Delhi,https://www.redbus.in/bus-tickets/jammu-to-delhi
4,Delhi to Jammu (j and k),https://www.redbus.in/bus-tickets/delhi-to-jammu
5,Jammu (j and k) to Amritsar,https://www.redbus.in/bus-tickets/jammu-to-amr...
6,Jaipur (Rajasthan) to Delhi,https://www.redbus.in/bus-tickets/jaipur-to-delhi
7,Amritsar to Jammu (j and k),https://www.redbus.in/bus-tickets/amritsar-to-...
8,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
9,Jammu (j and k) to Chandigarh,https://www.redbus.in/bus-tickets/jammu-to-cha...


In [4]:
#retrive the bus details
driver = webdriver.Chrome()
Bus_names_jk = []
Bus_types_jk = []
Start_Time_jk = []
End_Time_jk = []
Ratings_jk = []
Total_Duration_jk = []
Prices_jk = []
Seats_Available_jk = []
Route_names = []
Route_links = []

for i,r in df1_jk.iterrows():
    link=r["Link1"]
    routes=r["Route_Name"]

# Loop through each link
    driver.get(link)
    time.sleep(2)  

    # Click on elements to reveal bus details
    elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
    for element in elements:
        element.click()
        time.sleep(2)
    try:
        clicks = driver.find_element(By.XPATH, "//div[@class='button']")
        clicks.click()
    except:
        print("No View Button")
    time.sleep(2)
        
    scrolling = True
    while scrolling:
            old_page_source = driver.page_source
            # Use ActionChains to perform a PAGE_DOWN
            ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
            time.sleep(5)
            new_page_source = driver.page_source
            if new_page_source == old_page_source:
                scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
    except:
        continue
    price = driver.find_elements(By.XPATH, '//*[@class="fare d-block"]')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

    # Append data to respective lists
    for bus in bus_name:
        Bus_names_jk.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types_jk.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Start_Time_jk.append(start_time_elem.text)
    for end_time_elem in end_time:
        End_Time_jk.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration_jk.append(total_duration_elem.text)
    for ratings in rating:
        Ratings_jk.append(ratings.text)
    for price_elem in price:
        Prices_jk.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available_jk.append(seats_elem.text)
        
print("Successfully Completed")



No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
No View Button
Successfully Completed


In [5]:
# Proceed with creating the DataFrame
data_jk= {
    'Bus_name': Bus_names_jk,
    'Bus_type': Bus_types_jk,
    'Start_time': Start_Time_jk,
    'End_time': End_Time_jk,
    'Total_duration': Total_Duration_jk,
    'Price': Prices_jk,
    "Seats_Available": Seats_Available_jk,
    "Rating" :Ratings_jk,
    'Route_link': Route_links,
    'Route_name': Route_names
}

# Create DataFramedf2=pd.read_csv("ap3.csv")

df_buses_1 = pd.DataFrame(data_jk)

# Save to CSV
df_buses_1.to_csv('j&K2.csv', index=False)
print("Data saved to j&K2.csv successfully!")


Data saved to j&K2.csv successfully!


In [6]:
df2_jk=pd.read_csv("J&K2.csv")
df2_jk

Unnamed: 0,Bus_name,Bus_type,Start_time,End_time,Total_duration,Price,Seats_Available,Rating,Route_link,Route_name
0,Jamna Travels-Jammu,Non AC Seater (2+2),19:00,06:45,11h 45m,INR 549,31 Seats available,1.6,https://www.redbus.in/bus-tickets/srinagar-to-...,Srinagar to Jammu (j and k)
1,New Pal Travels,NON A/C Seater / Sleeper (2+2),21:30,07:30,10h 00m,INR 799,36 Seats available,1.8,https://www.redbus.in/bus-tickets/jammu-to-sri...,Jammu (j and k) to Srinagar
2,Jamna Travels-Jammu,Non AC Seater (2+2),21:30,08:00,10h 30m,INR 549,21 Seats available,2.3,https://www.redbus.in/bus-tickets/jammu-to-sri...,Jammu (j and k) to Srinagar
3,North Kashmir Tour and Travels,NON A/C Sleeper (2+2),17:15,03:15,10h 00m,INR 699,40 Seats available,1.6,https://www.redbus.in/bus-tickets/jammu-to-sri...,Jammu (j and k) to Srinagar
4,North Kashmir Tour and Travels,NON A/C Sleeper (2+2),16:30,02:30,10h 00m,INR 699,40 Seats available,1.6,https://www.redbus.in/bus-tickets/jammu-to-sri...,Jammu (j and k) to Srinagar
...,...,...,...,...,...,...,...,...,...,...
117,JKRTC - 178990,NON A/C Seater (2+3),17:30,05:30,12h 00m,INR 828,42 Seats available,3.6,https://www.redbus.in/bus-tickets/jammu-to-har...,Jammu (j and k) to Haridwar
118,Laxmi holidays,Bharat Benz A/C Seater /Sleeper (2+1),20:50,09:30,12h 40m,INR 576,28 Seats available,4.4,https://www.redbus.in/bus-tickets/jammu-to-har...,Jammu (j and k) to Haridwar
119,Laxmi holidays,Bharat Benz A/C Seater /Sleeper (2+1),17:45,06:00,12h 15m,INR 599,30 Seats available,4.6,https://www.redbus.in/bus-tickets/jammu-to-har...,Jammu (j and k) to Haridwar
120,City Land Travels,A/C Seater/Sleeper (2+1),20:30,09:00,12h 30m,529,20 Seats available,3.6,https://www.redbus.in/bus-tickets/jammu-to-har...,Jammu (j and k) to Haridwar
