In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
import mysql.connector

# Connect Database
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="1234",
    database="redbus"
)
cursor = db_connection.cursor()
print("Database connected Successfully")

# SQL to create table
create_table_query = """
CREATE TABLE IF NOT EXISTS BusDetails(
    id INT PRIMARY KEY AUTO_INCREMENT,
    state VARCHAR(100) DEFAULT 'Andhra Pradesh',
    route_name VARCHAR(255),
    route_link VARCHAR(255),
    bus_name VARCHAR(255),
    bus_type VARCHAR(255),
    departing_time TEXT,
    duration VARCHAR(50),
    reaching_time TEXT,
    star_rating TEXT,
    price VARCHAR(50),
    seat_availability TEXT
);
"""
cursor.execute(create_table_query)
print("Table created successfully")

# Initialize WebDriver and WebDriverWait
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)

# Open the webpage
driver.get("")
driver.maximize_window()
time.sleep(2)

# Function to scrape route data
def scrape_page():
    routescontainer = wait.until(EC.presence_of_all_elements_located((By.XPATH, '')))
    route_data = []  # List to store the scraped route data
    for route in routescontainer:
        route_data.append({
            'Route Name': route.text,
            'Route Link': route.get_attribute('href')
        })
    return route_data  # Return the scraped data

# Function to navigate pages
def navigate_pages():
    all_routes = []
    for page_number in range(1, 6):  # Limit to 5 pages
        try:
            routes = scrape_page()
            all_routes.extend(routes)
            pagination_container = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')))
            next_page_button = pagination_container.find_element(By.XPATH, f'.//div[contains(@class,"DC_117_pageTabs") and text()="{page_number + 1}"]')
            next_page_button.click()
            wait.until(EC.text_to_be_present_in_element(
                (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1))
            )
            time.sleep(2)  # Allow page to load
        except Exception as e:
            print(f"Pagination error at page {page_number + 1}: {e}")
            break
    return all_routes

# Function to scrape bus details
def collect_bus_details(route_data):
    alldetails = []
    for data in route_data:
        link = data['Route Link']
        route_name = data['Route Name']
        try:
            driver.get(link)
            time.sleep(2)
            try:
                view_buses_buttons = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='button']")))
                for button in view_buses_buttons:
                    wait.until(EC.element_to_be_clickable(button)).click()
                    time.sleep(3)  # Wait for the buses to load
            except Exception as e:
                print(f"Error clicking 'View Buses' button for {route_name}: {e}")

            bus_containers = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix row-one')]")))
            for bus_container in bus_containers:
                try:
                    busname = bus_container.find_element(By.XPATH, ".//div[@class='travels lh-24 f-bold d-color']").text
                    bustype = bus_container.find_element(By.XPATH, ".//div[@class='bus-type f-12 m-top-16 l-color evBus']").text
                    departuretiming = bus_container.find_element(By.XPATH, ".//div[@class='dp-time f-19 d-color f-bold']").text
                    duration = bus_container.find_element(By.XPATH, ".//div[@class='dur l-color lh-24']").text
                    reachtiming = bus_container.find_element(By.XPATH, ".//div[@class='bp-time f-19 d-color disp-Inline']").text
                    rating = bus_container.find_element(By.XPATH, ".//div[@class='rating-sec lh-24']").text
                    price = bus_container.find_element(By.XPATH, ".//span[contains(@class,'f-19 f-bold') or contains(@class,'f-bold f-19')]").text
                    seat = bus_container.find_element(By.XPATH, ".//div[contains(@class, 'seat-left m-top-30') or contains(@class,'seat-left m-top-16')]").text

                    details = {
                        'State': "WESTBENGAL",
                        'Route Name': route_name,
                        'Route Link': link,
                        'Bus Name': busname,
                        'Bus Type': bustype,
                        'Departing Time': departuretiming,
                        'Duration': duration,
                        'Reaching Time': reachtiming,
                        'Star Rating': rating,
                        'Price': price,
                        'Seat Availability': seat,
                    }
                     # Check if the details already exist in the list
                    if details not in alldetails:
                        print(details)
                        alldetails.append(details)

                        # Insert data into the database
                        insert_query = """
                        INSERT IGNORE INTO BusDetails (state, route_name, route_link, bus_name, bus_type, departing_time, duration, reaching_time, star_rating, price, seat_availability)
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        """
                        cursor.execute(insert_query, (
                            details['State'],
                            details['Route Name'],
                            details['Route Link'],
                            details['Bus Name'],
                            details['Bus Type'],
                            details['Departing Time'],
                            details['Duration'],
                            details['Reaching Time'],
                            details['Star Rating'],
                            details['Price'],
                            details['Seat Availability']
                        ))
                        db_connection.commit()
                except Exception as e:
                    print(f"Error scraping bus details for {route_name}: {e}")
        except Exception as e:
            print(f"Error accessing route link {link}: {e}")
    return alldetails

# Main execution
routes = navigate_pages()
print("Scraped Routes:", routes)
bus_details = collect_bus_details(routes)
print("Collected Bus Details:", bus_details)





Pagination error at page 2: Message: element click intercepted: Element is not clickable at point (739, 2096)
  (Session info: chrome=131.0.6778.265)
Stacktrace:
	GetHandleVerifier [0x00007FF7E47F80D5+2992373]
	(No symbol) [0x00007FF7E448BFD0]
	(No symbol) [0x00007FF7E432590A]
	(No symbol) [0x00007FF7E4380F2E]
	(No symbol) [0x00007FF7E437E9CC]
	(No symbol) [0x00007FF7E437BBA6]
	(No symbol) [0x00007FF7E437AB01]
	(No symbol) [0x00007FF7E436CD40]
	(No symbol) [0x00007FF7E439F36A]
	(No symbol) [0x00007FF7E436C596]
	(No symbol) [0x00007FF7E439F580]
	(No symbol) [0x00007FF7E43BF584]
	(No symbol) [0x00007FF7E439F113]
	(No symbol) [0x00007FF7E436A918]
	(No symbol) [0x00007FF7E436BA81]
	GetHandleVerifier [0x00007FF7E4856A2D+3379789]
	GetHandleVerifier [0x00007FF7E486C32D+3468109]
	GetHandleVerifier [0x00007FF7E4860043+3418211]
	GetHandleVerifier [0x00007FF7E45EC78B+847787]
	(No symbol) [0x00007FF7E449757F]
	(No symbol) [0x00007FF7E4492FC4]
	(No symbol) [0x00007FF7E449315D]
	(No symbol) [0x00007

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ActionChains
import time
import mysql.connector
import pandas as pd
# Connect Database
db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="1234",
    database="redbus"
)
cursor = db_connection.cursor()
print("Database connected Successfully")

# SQL to create table
create_table_query = """
CREATE TABLE IF NOT EXISTS BusDetails(
    id INT PRIMARY KEY AUTO_INCREMENT,
    state VARCHAR(100) DEFAULT 'Andhra Pradesh',
    route_name VARCHAR(255),
    route_link VARCHAR(255),
    bus_name VARCHAR(255),
    bus_type VARCHAR(255),
    departing_time TEXT,
    duration VARCHAR(50),
    reaching_time TEXT,
    star_rating TEXT,
    price VARCHAR(50),
    seat_availability TEXT
);
"""
cursor.execute(create_table_query)
print("Table created successfully")

# Initialize the WebDriver
driver = webdriver.Chrome()
driver.implicitly_wait(5)
wait = WebDriverWait(driver, 10)

# Navigate to the upsrtc Bus Routes page
driver.get("https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile")
driver.maximize_window()
time.sleep(2)
route_data = []
# Function to scrape route data
def scrape_page():
    routescontainer = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="root"]/div/div[4]/div[2]/div[1]/a')))
      
    for route in routescontainer:
        route_data.append({
            'Route Name': route.text,
            'Route Link': route.get_attribute('href')
        })
    return route_data  # Return the scraped data

# Pagination to collect routes from multiple pages
for page_number in range(1, 6):
    try:
        pagination_container = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')))
        next_page_button = pagination_container.find_element(By.XPATH, f'.//div[contains(@class,"DC_117_pageTabs") and text()="{page_number + 1}"]')

        actions = ActionChains(driver)
        actions.move_to_element(next_page_button).perform()
        next_page_button.click()
        time.sleep(1)

        wait.until(EC.text_to_be_present_in_element((By.XPATH, f'.//div[contains(@class,"DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))
        time.sleep(2)

        scrape_page()

    except Exception as e:
        print(e)
        break


# Function to scrape bus details
def collect_bus_details():
    alldetails = []
    for data in route_data:
        link = data['Route Link']
        route_name = data['Route Name']
        try:
            driver.get(link)
            time.sleep(2)
            try:
                view_buses_buttons = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='button']")))
                for button in view_buses_buttons:
                    wait.until(EC.element_to_be_clickable(button)).click()
                    time.sleep(3)  # Wait for the buses to load
            except Exception as e:
                print(f"Error clicking 'View Buses' button for {route_name}: {e}")

            bus_containers = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")))
            for bus_container in bus_containers:
                try:
                    busname = bus_container.find_element(By.XPATH, ".//div[@class='travels lh-24 f-bold d-color']").text
                    bustype = bus_container.find_element(By.XPATH, ".//div[@class='bus-type f-12 m-top-16 l-color evBus']").text
                    departuretiming = bus_container.find_element(By.XPATH, ".//div[@class='dp-time f-19 d-color f-bold']").text
                    duration = bus_container.find_element(By.XPATH, ".//div[@class='dur l-color lh-24']").text
                    reachtiming = bus_container.find_element(By.XPATH, ".//div[@class='bp-time f-19 d-color disp-Inline']").text
                    rating = bus_container.find_element(By.XPATH, ".//div[@class='rating-sec lh-24']").text
                    price = bus_container.find_element(By.XPATH, ".//span[contains(@class,'f-19 f-bold') or contains(@class,'f-bold f-19')]").text
                    seat = bus_container.find_element(By.XPATH, ".//div[contains(@class, 'seat-left m-top-30') or contains(@class,'seat-left m-top-16')]").text

                    details = {
                        'State': "Southbengal",
                        'Route Name': route_name,
                        'Route Link': link,
                        'Bus Name': busname,
                        'Bus Type': bustype,
                        'Departing Time': departuretiming,
                        'Duration': duration,
                        'Reaching Time': reachtiming,
                        'Star Rating': rating,
                        'Price': price,
                        'Seat Availability': seat,
                    }
                    # Check if the details already exist in the list
                    if details not in alldetails:
                        print(details)
                        alldetails.append(details)

                        # Insert data into the database
                        insert_query = """
                        INSERT IGNORE INTO BusDetails (state, route_name, route_link, bus_name, bus_type, departing_time, duration, reaching_time, star_rating, price, seat_availability)
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        """
                        cursor.execute(insert_query, (
                            details['State'],
                            details['Route Name'],
                            details['Route Link'],
                            details['Bus Name'],
                            details['Bus Type'],
                            details['Departing Time'],
                            details['Duration'],
                            details['Reaching Time'],
                            details['Star Rating'],
                            details['Price'],
                            details['Seat Availability']
                        ))
                        db_connection.commit()
                except Exception as e:
                    print(f"Error scraping bus details for {route_name}: {e}")
        except Exception as e:
            print(f"Error accessing route link {link}: {e}")
    return alldetails

# Main execution


bus_details = collect_bus_details()
print("Collected Bus Details:", bus_details)


df = pd.DataFrame(bus_details)
df.to_csv("Southbengal.csv", index=False)

# Close the WebDriver
driver.quit()

# Close the Database Connection
cursor.close()
db_connection.close()

Database connected Successfully
Table created successfully
Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6859502F5+28725]
	(No symbol) [0x00007FF6858B2AE0]
	(No symbol) [0x00007FF68574510A]
	(No symbol) [0x00007FF6857993D2]
	(No symbol) [0x00007FF6857995FC]
	(No symbol) [0x00007FF6857E3407]
	(No symbol) [0x00007FF6857BFFEF]
	(No symbol) [0x00007FF6857E0181]
	(No symbol) [0x00007FF6857BFD53]
	(No symbol) [0x00007FF68578A0E3]
	(No symbol) [0x00007FF68578B471]
	GetHandleVerifier [0x00007FF685C7F30D+3366989]
	GetHandleVerifier [0x00007FF685C912F0+3440688]
	GetHandleVerifier [0x00007FF685C878FD+3401277]
	GetHandleVerifier [0x00007FF685A1AAAB+858091]
	(No symbol) [0x00007FF6858BE74F]
	(No symbol) [0x00007FF6858BA304]
	(No symbol) [0x00007FF6858BA49D]
	(No symbol) [0x00007FF6858A8B69]
	BaseThreadInitThunk [0x00007FFFFD2CE8D7+23]
	RtlUserThreadStart [0x00007FFFFE93FBCC+44]

{'State': 'Southbengal', 'Route Name': 'Kolkata to Asansol (West Bengal)', 'Route Link': 'https://www.redbus.in/bus-