# APSRTC

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS APSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO APSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('APSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'APSRTC_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'APSRTC_bus_details.csv' and MySQL database.


In [5]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/APSRTC_bus_details.csv")

# ASTC

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS Assam_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO Assam_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('Assam_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'Assam_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/nagaon-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/goalpara-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-north-lakhimpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-dibrugarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/north-lakhimpur-to-jorhat
No 'View Buses' button found for https://www.redbus.in/bus-tickets/north-lakhimpur-to-sibsagar
No 'View Buses' button found for https://www.redbus.in/bus-tickets/dhekiajuli-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/sibsagar-to-north-lakhimpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-dhemaji
No 'View Buses' button found for https://www.redbus.in/bus-tickets/dhemaji-to-jorhat
No 'View Buses' button found for https://www.redbus.in/bus-tickets/north-lakhimpur-to-dibrugarh
No 'Vie

In [9]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/Assam_bus_details.csv")

# Chandigarh

In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS Chandigarh_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO Chandigarh_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('Chandigarh_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'Chandigarh_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/talwara-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/chandigarh-to-dinanagar-punjab
No 'View Buses' button found for https://www.redbus.in/bus-tickets/dinanagar-punjab-to-chandigarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/hisar-haryana-to-chandigarh
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'Chandigarh_bus_details.csv' and MySQL database.


In [15]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/Chandigarh_bus_details.csv")

# HSRTC

In [17]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 5):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS HSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO HSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('HSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'HSRTC_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/delhi-to-baddi-himachal-pradesh
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'HSRTC_bus_details.csv' and MySQL database.


In [19]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/HSRTC_bus_details.csv")

# Kadamba

In [25]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 5):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS Kadamba_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO Kadamba_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('Kadamba_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'Kadamba_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/calangute-goa-to-mopa-airport
No 'View Buses' button found for https://www.redbus.in/bus-tickets/goa-airport-to-goa
No 'View Buses' button found for https://www.redbus.in/bus-tickets/belagavi-to-goa
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'Kadamba_bus_details.csv' and MySQL database.


In [27]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/Kadamba_bus_details.csv")

# JKSRTC

In [43]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/jksrtc"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 2):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS JKSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO JKSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('JKSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'JKSRTC_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/delhi-to-srinagar
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jammu-to-poonch
No 'View Buses' button found for https://www.redbus.in/bus-tickets/mendhar-j-k-to-jammu
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kishtwar-to-jammu
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'JKSRTC_bus_details.csv' and MySQL database.


In [45]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/JKSRTC_bus_details.csv")

# KSRTC

In [35]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 3):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS KSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO KSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('KSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'KSRTC_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'KSRTC_bus_details.csv' and MySQL database.


In [37]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/KSRTC_bus_details.csv")

# NBSTC

In [39]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/north-bengal-state-transport-corporation"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS NBSTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO NBSTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('NBSTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'NBSTC_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/siliguri-to-darjeeling
No 'View Buses' button found for https://www.redbus.in/bus-tickets/cooch-behar-west-bengal-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/siliguri-to-cooch-behar-west-bengal
No 'View Buses' button found for https://www.redbus.in/bus-tickets/raiganj-to-balurghat
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'NBSTC_bus_details.csv' and MySQL database.


In [41]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/NBSTC_bus_details.csv")

# Punjab

In [47]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/pepsu/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 3):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS PEPSU_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO PEPSU_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('PEPSU_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'PEPSU_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'PEPSU_bus_details.csv' and MySQL database.


In [49]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/PEPSU_bus_details.csv")

# RSRTC

In [51]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 3):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS RSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO RSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('RSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'RSRTC_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'RSRTC_bus_details.csv' and MySQL database.


In [53]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/RSRTC_bus_details.csv")

# SBSTC

In [55]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 6):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS SBSTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO SBSTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('SBSTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'SBSTC_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/jhargram-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-midnapore
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kirnahar-west-bengal-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/berhampore-to-durgapur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/purulia-to-durgapur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/durgapur-to-berhampore
No 'View Buses' button found for https://www.redbus.in/bus-tickets/durgapur-to-barasat-west-bengal
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-digha
Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'SBSTC_bus_details.csv' and MySQL database.


In [57]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/SBSTC_bus_details.csv")

# TSRTC

In [59]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 4):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS TSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO TSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('TSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'TSRTC_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'TSRTC_bus_details.csv' and MySQL database.


In [61]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/TSRTC_bus_details.csv")

# UPSRTC

In [63]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 4):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS UPSRTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO UPSRTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('UPSRTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'UPSRTC_bus_details.csv' and MySQL database.")

Data inserted successfully into MySQL database.
Scraping completed. Data saved to 'UPSRTC_bus_details.csv' and MySQL database.


In [65]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/UPSRTC_bus_details.csv")

# WBTC

In [67]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import pymysql.cursors

URL = "https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)

def scrape_bus_routes(driver):
    route_elements = driver.find_elements(By.CLASS_NAME, 'route')
    bus_routes_link = [route.get_attribute('href') for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5)

        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {url}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left')]")
        
        bus_details = []

        for i in range(len(bus_name_elements)):
            try:
                seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                seat_availability = int(seat_availability) if seat_availability.isdigit() else 0

                price_text = price_elements[i].text
                price_numeric = ''.join(filter(str.isdigit, price_text))

                bus_detail = {
                    "Route_Name": route_name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_numeric,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else 'N/A'
                }
                bus_details.append(bus_detail)
            except Exception as e:
                print(f"Error occurred while scraping bus details: {str(e)}")
        return bus_details

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

def scrape_all_pages():
    all_bus_details = []
    for page in range(1, 5):
        try:
            driver = initialize_driver()
            load_page(driver, URL)
            
            if page > 1:
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)

            all_bus_routes_link, all_bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(all_bus_routes_link, all_bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                if bus_details:
                    all_bus_details.extend(bus_details)
            driver.quit()
        except Exception as e:
            print(f"Error occurred while accessing page {page}: {str(e)}")

    return all_bus_details

def insert_data_into_mysql(data):
    try:
        connection = pymysql.connect(
            host='127.0.0.1',
            user='root',
            password='Root',
            database='redbus',
            cursorclass=pymysql.cursors.DictCursor
        )
        
        with connection:
            with connection.cursor() as cursor:
                cursor.execute("""
                CREATE TABLE IF NOT EXISTS WBTC_bus_details (
                    Route_Name TEXT,
                    Route_Link TEXT,
                    Bus_Name TEXT,
                    Bus_Type TEXT,
                    Departing_Time TIME,
                    Duration TEXT,
                    Reaching_Time TIME,
                    Star_Rating FLOAT,
                    Price DECIMAL(10,2),
                    Seat_Availability TEXT
                )
                """)

                for detail in data:
                    cursor.execute("""
                    INSERT INTO WBTC_bus_details (
                        Route_Name, Route_Link, Bus_Name, Bus_Type, Departing_Time, 
                        Duration, Reaching_Time, Star_Rating, Price, Seat_Availability
                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """, (
                        detail['Route_Name'], detail['Route_Link'], detail['Bus_Name'], 
                        detail['Bus_Type'], detail['Departing_Time'], detail['Duration'], 
                        detail['Reaching_Time'], detail['Star_Rating'], detail['Price'], 
                        detail['Seat_Availability']
                    ))

            connection.commit()

        print("Data inserted successfully into MySQL database.")

    except pymysql.MySQLError as err:
        print(f"Error: {err}")

if __name__ == "__main__":
    all_bus_details = scrape_all_pages()
    insert_data_into_mysql(all_bus_details)
    df = pd.DataFrame(all_bus_details)
    df.to_csv('WBTC_bus_details.csv', index=False)
    print("Scraping completed. Data saved to 'WBTC_bus_details.csv' and MySQL database.")

No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-digha
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-midnapore
No 'View Buses' button found for https://www.redbus.in/bus-tickets/midnapore-to-kolkata
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-kolaghat
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-contai-kanthi
No 'View Buses' button found for https://www.redbus.in/bus-tickets/habra-to-digha
No 'View Buses' button found for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-nandakumar
No 'View Buses' button found for https://www.redbus.in/bus-tickets/digha-to-habra
No 'View Buses' button found for https://www.redbus.in/bus-tickets/midnapore-to-barasat-west-bengal
No 'View Buses' button found for https://www.redbus.in/bus-tickets/kolkata-to-bolpur-west-bengal
No 'View Buses' button found for https://www.redbus

In [69]:
df.to_csv("C:/Users/Lokesh J/Downloads/RedBus_Scrapped_Data/WBTC_bus_details.csv")