In [4]:
pip install selenium





In [5]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install pymysql




In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS upsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO upsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('upsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'upsrtc_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/lucknow-to-aligarh
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'upsrtc_bus_details.csv' and MySQL database.


In [9]:
import os
print(os.getcwd())


C:\windows\system32


In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://redbus.in/online-booking/rsrtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS rsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO rsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('rsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'rsrtc_bus_details.csv' and MySQL database.")

Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 4: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE

In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/pepsu"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS pepsu_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO pepsu_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('pepsu_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'pepsu_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/jalandhar-to-delhi-airport
Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 4: Mes

In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/apsrtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS apsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO apsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('apsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'apsrtc_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'apsrtc_bus_details.csv' and MySQL database.


In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ksrtc-kerala"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS ksrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO ksrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('ksrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'ksrtc_bus_details.csv' and MySQL database.")

Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 4: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE

In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/tsrtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS tsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO tsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('tsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'tsrtc_bus_details.csv' and MySQL database.")

Error occurred while accessing page 4: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 5: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE

In [16]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/kaac-transport"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS kaac_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO kaac_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('kaac_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'kaac_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/guwahati-to-hamren
Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 4: Message: 
S

In [17]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/sikkim-nationalised-transport-snt"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS snt_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO snt_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('snt_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'snt_bus_details.csv' and MySQL database.")

Error occurred while accessing page 2: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE

In [18]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/meghalaya-transport-corporation-mtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS mtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO mtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('mtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'mtc_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/shillong-to-ramkrishnanagar
No 'View Buses' button found for https://www.redbus.in/bus-tickets/ramkrishnanagar-to-shillong
Error occurred while accessing page 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D

In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/bihar-state-road-transport-corporation-bsrtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS bsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO bsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('bsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'bsrtc_bus_details.csv' and MySQL database.")

Error occurred while accessing page 5: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'bsrtc_bus_details.csv' and MySQL database.


In [20]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ktcl"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='MynameisPBL@2710',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS ktcl_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO ktcl_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('ktcl_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'ktcl_bus_details.csv' and MySQL database.")

Error occurred while accessing page 5: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60A379412+29090]
	(No symbol) [0x00007FF60A2EE239]
	(No symbol) [0x00007FF60A1AB1DA]
	(No symbol) [0x00007FF60A1FEFE7]
	(No symbol) [0x00007FF60A1FF23C]
	(No symbol) [0x00007FF60A2497C7]
	(No symbol) [0x00007FF60A22672F]
	(No symbol) [0x00007FF60A2465A2]
	(No symbol) [0x00007FF60A226493]
	(No symbol) [0x00007FF60A1F09D1]
	(No symbol) [0x00007FF60A1F1B31]
	GetHandleVerifier [0x00007FF60A69871D+3302573]
	GetHandleVerifier [0x00007FF60A6E4243+3612627]
	GetHandleVerifier [0x00007FF60A6DA417+3572135]
	GetHandleVerifier [0x00007FF60A435EB6+801862]
	(No symbol) [0x00007FF60A2F945F]
	(No symbol) [0x00007FF60A2F4FB4]
	(No symbol) [0x00007FF60A2F5140]
	(No symbol) [0x00007FF60A2E461F]
	BaseThreadInitThunk [0x00007FFC553D7374+20]
	RtlUserThreadStart [0x00007FFC566DCC91+33]

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'ktcl_bus_details.csv' and MySQL database.


In [21]:
pwd

'C:\\windows\\system32'