In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ActionChains
import time
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
import mysql.connector

# Connect Database
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="boot",
    database="redbus"
)
cursor = db_connection.cursor()
print("Database connected Successfully")

# SQL to create table
create_table_query = """
CREATE TABLE IF NOT EXISTS BusDetails(
    id INT PRIMARY KEY AUTO_INCREMENT,
    state VARCHAR(100) DEFAULT 'Rajasthan',
    route_name VARCHAR(255),
    route_link VARCHAR(255),
    bus_name VARCHAR(255),
    bus_type VARCHAR(255),
    departing_time TEXT,
    duration VARCHAR(50),
    reaching_time TEXT,
    star_rating TEXT,
    price VARCHAR(50),
    seat_availability TEXT
);
"""
cursor.execute(create_table_query)
print("Table created successfully")

# Initialize the WebDriver
driver = webdriver.Chrome()
driver.implicitly_wait(5)
wait = WebDriverWait(driver, 30)

# Navigate to the RSRTC Bus Routes page
driver.get('https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile')
driver.maximize_window()
time.sleep(2)

# Scrape route data
route_data = []
def scrape_routes():
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    for route in routes:
        route_data.append({
            'Route Name': route.text,
            'Route Link': route.get_attribute('href')
        })
scrape_routes()
print("scraped routes successfully")

# Scroll to load more routes
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# Pagination to collect routes from multiple pages
for page_number in range(1, 6):
    try:
        pagination_container = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')))
        next_page_button = pagination_container.find_element(By.XPATH, f'.//div[contains(@class,"DC_117_pageTabs") and text()="{page_number + 1}"]')

        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        next_page_button.click()
        time.sleep(1)

        wait.until(EC.text_to_be_present_in_element((By.XPATH, f'.//div[contains(@class,"DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))
        time.sleep(2)

        scrape_routes()

    except Exception as e:
        print(e)
        break

def collect_bus_details():
    alldetails = []
    for data in route_data:
        link = data['Route Link']
        route_name = data['Route Name']
        try:
            driver.get(link)
            time.sleep(2)

            # Click the 'View Buses' button if it exists
            try:
                # Wait for the button to be clickable
                view_buses_buttons = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='button']")))
                
                for view_buses_button in view_buses_buttons:
                    wait.until(EC.element_to_be_clickable(view_buses_button)).click()
                    time.sleep(5)
            except Exception as e:
                print(e)

            # Scroll to load more buses
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(5)

            bus_containers = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")))
            
            for bus_container in bus_containers:
                try:
                    busname = bus_container.find_element(By.XPATH, ".//div[@class='travels lh-24 f-bold d-color']").text
                    bustype = bus_container.find_element(By.XPATH, ".//div[@class='bus-type f-12 m-top-16 l-color evBus']").text
                    departuretiming = bus_container.find_element(By.XPATH, ".//div[@class='dp-time f-19 d-color f-bold']").text
                    duration = bus_container.find_element(By.XPATH, ".//div[@class='dur l-color lh-24']").text
                    reachtiming = bus_container.find_element(By.XPATH, ".//div[@class='bp-time f-19 d-color disp-Inline']").text
                    rating = bus_container.find_element(By.XPATH, ".//div[@class='rating-sec lh-24']").text
                    price = bus_container.find_element(By.XPATH, ".//span[contains(@class,'f-19 f-bold') or contains(@class,'f-bold f-19')]").text
                    seat = bus_container.find_element(By.XPATH, ".//div[contains(@class, 'seat-left m-top-30') or contains(@class,'seat-left m-top-16')]").text
                    
                    details = {
                        'State':"Rajasthan",
                        'Route Name': route_name,
                        'Route Link': link,
                        'Bus Name': busname,
                        'Bus Type': bustype,
                        'Departing Time': departuretiming,
                        'Duration': duration,
                        'Reaching Time': reachtiming,
                        'Star Rating': rating,
                        'Price': price,
                        'Seat Availability': seat,
                    }

                    # Check if the details already exist in the list
                    if details not in alldetails:
                        print(details)
                        alldetails.append(details)

                        # Insert data into the database
                        insert_query = """
                        INSERT IGNORE INTO BusDetails (state, route_name, route_link, bus_name, bus_type, departing_time, duration, reaching_time, star_rating, price, seat_availability)
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        """
                        cursor.execute(insert_query, (
                            details['State'],
                            details['Route Name'],
                            details['Route Link'],
                            details['Bus Name'],
                            details['Bus Type'],
                            details['Departing Time'],
                            details['Duration'],
                            details['Reaching Time'],
                            details['Star Rating'],
                            details['Price'],
                            details['Seat Availability']
                        ))
                        db_connection.commit()

                except Exception as e:
                    print(e)

        except Exception as e:
            print(e)
    return alldetails

# Collect bus details and save to CSV
route_details = collect_bus_details()
df = pd.DataFrame(route_details)
df.to_csv("Rajasthan.csv", index=False)

# Close the WebDriver
driver.quit()

# Close the Database Connection
cursor.close()
db_connection.close()

Database connected Successfully
Table created successfully
scraped routes successfully
Message: no such element: Unable to locate element: {"method":"xpath","selector":".//div[contains(@class,"DC_117_pageTabs") and text()="3"]"}
  (Session info: chrome=131.0.6778.86); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7B1966CB5+28821]
	(No symbol) [0x00007FF7B18D3840]
	(No symbol) [0x00007FF7B177578A]
	(No symbol) [0x00007FF7B17C91BE]
	(No symbol) [0x00007FF7B17C94AC]
	(No symbol) [0x00007FF7B17BC52C]
	(No symbol) [0x00007FF7B17EF33F]
	(No symbol) [0x00007FF7B17BC3F6]
	(No symbol) [0x00007FF7B17EF510]
	(No symbol) [0x00007FF7B180F412]
	(No symbol) [0x00007FF7B17EF0A3]
	(No symbol) [0x00007FF7B17BA778]
	(No symbol) [0x00007FF7B17BB8E1]
	GetHandleVerifier [0x00007FF7B1C9FCAD+3408013]
	GetHandleVerifier [0x00007FF7B1CB741F+3504127]
	GetHandleVerifier [0x00