In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS upsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO upsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('upsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'upsrtc_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/delhi-to-aligarh-uttar-pradesh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/lucknow-to-allahabad
Error occurred while accessing https://www.redbus.in/bus-tickets/lucknow-to-allahabad: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x012B6643]
	(No symbol) [0x0124BE21]
	(No symbol) [0x0114DA9D]
	(No symbol) [0x0112EF6A]
	(No symbol) [0x011A3AAB]
	(No symbol) [0x011B61B6]
	(No symbol) [0x0119FB76]
	(No symbol) [0x011749C1]
	(No symbol) [0x01175E5D]
	GetHandleVerifier [0x0152A142+2497106]
	GetHandleVerifier [0x015585D3+2686691]
	GetHandleVerifier [0x0155BB9C+2700460]
	GetHandleVerifier [0x01363B10+635936]
	(No symbol) [0x01254A1F]
	(No symbol) [0x0125A418]
	(No symbol) [0x0125A505]
	(No symbol) [0x0126508B]
	BaseThreadInitThunk [0x76C46A24+36]
	RtlInitializeExceptionChain [0x770CA

In [None]:
import pandas as pd 
df=pd.read_csv("upsrtc_bus_details.csv")
df

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS ksrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO ksrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('ksrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'ksrtc_bus_details.csv' and MySQL database.")


Error occurred while accessing https://www.redbus.in/bus-tickets/kozhikode-to-bangalore: Message: timeout: Timed out receiving message from renderer: 299.869
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x012B6643]
	(No symbol) [0x0124BE21]
	(No symbol) [0x0114DA9D]
	(No symbol) [0x0113F55A]
	(No symbol) [0x0113F2D8]
	(No symbol) [0x0113DC68]
	(No symbol) [0x0113E647]
	(No symbol) [0x01148568]
	(No symbol) [0x01154956]
	(No symbol) [0x011581C6]
	(No symbol) [0x0113E9F1]
	(No symbol) [0x011546D5]
	(No symbol) [0x011B7057]
	(No symbol) [0x0119FB76]
	(No symbol) [0x011749C1]
	(No symbol) [0x01175E5D]
	GetHandleVerifier [0x0152A142+2497106]
	GetHandleVerifier [0x015585D3+2686691]
	GetHandleVerifier [0x0155BB9C+2700460]
	GetHandleVerifier [0x01363B10+635936]
	(No symbol) [0x01254A1F]
	(No symbol) [0x0125A418]
	(No symbol) [0x0125A505]
	(No symbol) [0x0126508B]
	BaseThreadInitThunk [0x76C46A24+36]
	RtlInitializeExceptionChain [0x770CA9FF+143]
	RtlInitializeExc

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS apsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO apsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('apsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'apsrtc_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad
No 'View Buses' button found for https://www.redbus.in/bus-tickets/visakhapatnam-to-vijayawada
Error occurred while accessing https://www.redbus.in/bus-tickets/visakhapatnam-to-vijayawada: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x012B6643]
	(No symbol) [0x0124BE21]
	(No symbol) [0x0114DA9D]
	(No symbol) [0x0112EF6A]
	(No symbol) [0x011A3AAB]
	(No symbol) [0x011B61B6]
	(No symbol) [0x0119FB76]
	(No symbol) [0x011749C1]
	(No symbol) [0x01175E5D]
	GetHandleVerifier [0x0152A142+2497106]
	GetHandleVerifier [0x015585D3+2686691]
	GetHandleVerifier [0x0155BB9C+2700460]
	GetHandleVerifier [0x01363B10+635936]
	(No symbol) [0x01254A1F]
	(No symbol) [0x0125A418]
	(No symbol) [0x0125A505]
	(No symbol) [0x0126508B]
	BaseThreadInitThunk [0x76C46A24+36]
	RtlInitializeExceptionChain [

In [5]:
import pandas as pd 
df=pd.read_csv("apsrtc_bus_details.csv")
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,APSRTC - 3360,AMARAVATHI (VOLVO / SCANIA A.C Multi Axle),18:25,07h 05m,01:30,3.8,720,39 Seats available
1,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,APSRTC - 4916,Super Luxury (Non AC Seater 2+2 Push Back),19:25,06h 50m,02:15,4.4,469,19 Seats available
2,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,APSRTC - 2563,INDRA(A.C. Seater),19:30,05h 55m,01:25,4.5,528,31 Seats available
3,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,APSRTC - 2680,VENNELA (A.C. SLEEPER),19:45,06h 15m,02:00,4.6,839,13 Seats available
4,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,APSRTC - 2798,INDRA(A.C. Seater),19:55,06h 35m,02:30,4.3,567,32 Seats available
...,...,...,...,...,...,...,...,...,...,...
1374,Bangalore to Kadiri,https://www.redbus.in/bus-tickets/bangalore-to...,AR & BCVR Travels,Non A/C Seater / Sleeper (2+1),20:30,06h 30m,03:00,4.0,500,28 Seats available
1375,Bangalore to Kadiri,https://www.redbus.in/bus-tickets/bangalore-to...,Vasavi travels,Non A/C Seater / Sleeper (2+1),20:30,05h 30m,02:00,3.7,450,16 Seats available
1376,Bangalore to Kadiri,https://www.redbus.in/bus-tickets/bangalore-to...,Ahobila Travels,Non A/C Seater / Sleeper (2+1),20:35,05h 55m,02:30,3.1,399,23 Seats available
1377,Bangalore to Kadiri,https://www.redbus.in/bus-tickets/bangalore-to...,AR & BCVR Travels,Non A/C Seater / Sleeper (2+1),21:00,06h 00m,03:00,4.0,450,19 Seats available


In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS ktcl_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO ktcl_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('ktcl_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'ktcl_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/goa-to-pune
No 'View Buses' button found for https://www.redbus.in/bus-tickets/pandharpur-to-goa
Error occurred while accessing https://www.redbus.in/bus-tickets/belagavi-to-goa: Message: timeout: Timed out receiving message from renderer: 299.721
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x012B6643]
	(No symbol) [0x0124BE21]
	(No symbol) [0x0114DA9D]
	(No symbol) [0x0113F55A]
	(No symbol) [0x0113F2D8]
	(No symbol) [0x0113DC68]
	(No symbol) [0x0113E647]
	(No symbol) [0x01148568]
	(No symbol) [0x01154956]
	(No symbol) [0x011581C6]
	(No symbol) [0x0113E9F1]
	(No symbol) [0x011546D5]
	(No symbol) [0x011B7057]
	(No symbol) [0x0119FB76]
	(No symbol) [0x011749C1]
	(No symbol) [0x01175E5D]
	GetHandleVerifier [0x0152A142+2497106]
	GetHandleVerifier [0x015585D3+2686691]
	GetHandleVerifier [0x0155BB9C+2700460]
	GetHandleVerifier [0x01363B10+635936]
	(No symbol) [0x01254A1F]
	(No symbol) [0x0125A

In [4]:
import pandas as pd 
df=pd.read_csv("ktcl_bus_details.csv")
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa,Kadamba Transport Corporation Limited (KTCL) -...,A/C Sleeper (2+1),19:00,10h 50m,05:50,3.8,800,2 Seats available
1,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa,Atmaram Gobus,VE A/C Sleeper (2+1),20:00,12h 45m,08:45,4.6,600,12 Seats available
2,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa,Ashray Travels,Bharat Benz A/C Sleeper (2+1),18:05,14h 25m,08:30,4.6,647,15 Seats available
3,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa,AdIntrCity SmartBus,AC Sleeper (2+1),20:20,11h 20m,07:40,4.1,679,7 Seats available
4,Pune to Goa,https://www.redbus.in/bus-tickets/pune-to-goa,Atmaram Gobus,VE A/C Sleeper (2+1),18:30,13h 00m,07:30,4.4,626,13 Seats available
...,...,...,...,...,...,...,...,...,...,...
234,Goa to Satara,https://www.redbus.in/bus-tickets/goa-to-satara,VRL Travels,A/C Sleeper (2+1),20:00,09h 00m,05:00,0.0,800,35 Seats available
235,Goa to Satara,https://www.redbus.in/bus-tickets/goa-to-satara,VRL Travels,A/C Sleeper (2+1),19:30,08h 30m,04:00,0.0,900,27 Seats available
236,Shirdi to Goa,https://www.redbus.in/bus-tickets/shirdi-to-goa,PSR Travels Goa,A/C Sleeper (2+1),18:20,14h 40m,09:00,3.6,1299,6 Seats available
237,Goa to Shirdi,https://www.redbus.in/bus-tickets/goa-to-shirdi,PSR Travels Goa,A/C Sleeper (2+1),19:15,14h 45m,10:00,3.8,1099,15 Seats available


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/kaac-transport"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS kaac_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO kaac_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('kaac_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'kaac_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/dokmoka-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/guwahati-to-dokmoka
No 'View Buses' button found for https://www.redbus.in/bus-tickets/guwahati-to-bokolia-assam
Error occurred while accessing https://www.redbus.in/bus-tickets/langhin-assam-to-guwahati: Message: timeout: Timed out receiving message from renderer: -0.000
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x00C16643]
	(No symbol) [0x00BABE21]
	(No symbol) [0x00AADA9D]
	(No symbol) [0x00A9F55A]
	(No symbol) [0x00A9F2D8]
	(No symbol) [0x00A9DC68]
	(No symbol) [0x00A9E647]
	(No symbol) [0x00AA8568]
	(No symbol) [0x00AB4956]
	(No symbol) [0x00AB81C6]
	(No symbol) [0x00A9E9F1]
	(No symbol) [0x00AB46D5]
	(No symbol) [0x00B169B5]
	(No symbol) [0x00AFFB76]
	(No symbol) [0x00AD49C1]
	(No symbol) [0x00AD5E5D]
	GetHandleVerifier [0x00E8A142+2497106]
	GetHandleVerifier [0x00EB85D3+2686691]
	GetHandleVeri

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS ctu_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO ctu_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('ctu_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'ctu_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/yamuna-nagar-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/chandigarh-to-vrindavan
No 'View Buses' button found for https://www.redbus.in/bus-tickets/ludhiana-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/vrindavan-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/sujanpur-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/shimla-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/chandigarh-to-dehradun
No 'View Buses' button found for https://www.redbus.in/bus-tickets/pathankot-to-chandigarh
Error occurred while accessing https://www.redbus.in/bus-tickets/chandigarh-to-haridwar: Message: timeout: Timed out receiving message from renderer: -0.000
  (Session 

In [3]:
import pandas as pd 
df=pd.read_csv("ctu_bus_details.csv")
df

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability
0,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165691,HVAC Seater (2+3),11:20,05h 35m,16:55,3.9,40476,47 Seats available
1,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165693,HVAC Seater (2+3),12:30,05h 35m,18:05,4.1,40476,46 Seats available
2,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165697,HVAC Seater (2+3),14:00,05h 35m,19:35,3.4,40476,47 Seats available
3,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165709,HVAC Seater (2+3),14:30,05h 35m,20:05,3.9,40476,47 Seats available
4,Chandigarh to Delhi,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh Transport Undertaking (CTU) - 165720,HVAC Seater (2+3),15:30,05h 35m,21:05,3.4,40476,46 Seats available
...,...,...,...,...,...,...,...,...,...,...
236,Chandigarh to Katra (jammu and kashmir),https://www.redbus.in/bus-tickets/chandigarh-t...,City Land Travels,A/C Seater/Sleeper (2+1),23:15,08h 25m,07:40,2.3,584,24 Seats available
237,Chandigarh to Katra (jammu and kashmir),https://www.redbus.in/bus-tickets/chandigarh-t...,Northern Travels,Bharat Benz A/C Sleeper (2+1),22:30,08h 30m,07:00,1.7,899,24 Seats available
238,Chandigarh to Katra (jammu and kashmir),https://www.redbus.in/bus-tickets/chandigarh-t...,Big Bus,A/C Seater / Sleeper (2+2),20:20,08h 10m,04:30,0.0,599,41 Seats available
239,Chandigarh to Katra (jammu and kashmir),https://www.redbus.in/bus-tickets/chandigarh-t...,RAO TRAVELS INDIA PVT LTD.,Mercedes Benz A/C (2+2),23:00,05h 15m,04:15,0.0,3000,48 Seats available


In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS rsrtc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO rsrtc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('rsrtc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'rsrtc_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/jodhpur-to-ajmer
No 'View Buses' button found for https://www.redbus.in/bus-tickets/beawer-to-jaipur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/udaipur-to-jodhpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jaipur-to-jodhpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/sikar-to-jaipur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kishangarh-to-jaipur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-jaipur
Error occurred while accessing https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-jaipur: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=109.0.5414.168)
Stacktrace:
Backtrace:
	(No symbol) [0x00C16643]
	(No symbol) [0x00BABE21]
	(No symbol) [0x00AADA9D]
	(No symbol) [0x00A8EF6A]
	(No symbol) [0x00B03AAB]
	(No sy

In [8]:
import pandas as pd 
df=pd.read_csv("rsrtc_bus_details.csv")
df

EmptyDataError: No columns to parse from file

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/north-bengal-state-transport-corporation"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='6381167213',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS nbstc_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO nbstc_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('nbstc_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'nbstc_bus_details.csv' and MySQL database.")


No 'View Buses' button found for https://www.redbus.in/bus-tickets/kolkata-to-siliguri
No 'View Buses' button found for https://www.redbus.in/bus-tickets/siliguri-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/siliguri-to-darjeeling
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kolkata-to-raiganj
No 'View Buses' button found for https://www.redbus.in/bus-tickets/raiganj-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kolkata-to-malda
No 'View Buses' button found for https://www.redbus.in/bus-tickets/cooch-behar-west-bengal-to-berhampore
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kolkata-to-cooch-behar-west-bengal
No 'View Buses' button found for https://www.redbus.in/bus-tickets/malda-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/berhampore-to-cooch-behar-west-bengal
No 'View Buses' button found for https://www.redbus.in/bus-tickets/cooch-behar-west-