In [6]:
pip install webdriver-manager


Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv, webdriver-manager
Successfully installed python-dotenv-1.0.1 webdriver-manager-4.0.2
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("kerala.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus1_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver_.quit()


Navigating to page 2
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


NameError: name 'driver_' is not defined

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("APSRTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()


Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


NameError: name 'driver_k' is not defined

In [3]:
#RSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("RSRTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus2_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()


Navigating to page 2
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [4]:
#CTU
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("CTU.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus3_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()


Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [5]:
#HRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("HRTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus4_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

Navigating to page 2
Navigating to page 3
Navigating to page 4
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [7]:
#JKSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/jksrtc")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("JKSRTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus5_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [8]:
#SBSTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("SBSTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus6_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [9]:
#WBTC(CTC)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("WBTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus7_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

Navigating to page 2
Navigating to page 3
Navigating to page 4
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [10]:
#PEPSU
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/pepsu/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("PEPSU.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus8_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [11]:
#TSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
import time
import pandas as pd

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# Open the desired webpage
driver.get("https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile")
time.sleep(3)  # Allow time for the page to load


# Function to retrieve bus route links and route names
def link_route(path):
    LINKS = []
    ROUTE = []
    wait = WebDriverWait(driver, 10)

    while True:
        try:
            paths = driver.find_elements(By.XPATH, path)
            for links in paths:
                d = links.get_attribute("href")
                if d:
                    LINKS.append(d)
            for route in paths:
                ROUTE.append(route.text)
            
            # Handle pagination
            try:
                active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
                active_page_number = active_page_element.text
                next_page_number = str(int(active_page_number) + 1)
                next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
                next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
                time.sleep(1)
                next_page_button.click()
                print(f"Navigating to page {next_page_number}")
                time.sleep(10)
            except (NoSuchElementException, TimeoutException):
                print("No more pages to paginate or pagination element not found")
                break
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            break

    return LINKS, ROUTE


# Retrieve route links and names
LINKS, ROUTE = link_route("//a[@class='route']")

# Save route data
df_routes = pd.DataFrame({"Route_name": ROUTE, "Route_link": LINKS})
df_routes.to_csv("TSRTC.csv", index=False)
print("Route details saved successfully.")

# Close the first driver
driver.quit()


# Initialize second WebDriver for bus details
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

Bus_names = []
Bus_types = []
Departure = []
Arrival = []
Ratings = []
Total_Duration = []
Prices = []
Seats_Available = []
Route_names = []
Route_links = []

# Loop through route links to extract bus details
for i, r in df_routes.iterrows():
    link = r["Route_link"]
    routes = r["Route_name"]
    driver.get(link)
    time.sleep(2)

    try:
        view_buses_button = driver.find_element(By.XPATH, "//div[@class='button']")
        view_buses_button.click()
    except:
        continue
    time.sleep(2)

    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page_source = driver.page_source
        if new_page_source == old_page_source:
            scrolling = False

    # Extract bus details
    bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    start_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
    end_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
    total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
    price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
    seats = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
    try:
        rating = driver.find_elements(By.XPATH, "//div[@class='clearfix row-one']/div[@class='column-six p-right-10 w-10 fl']")
    except:
        rating = []

    # Append data to lists
    for bus in bus_name:
        Bus_names.append(bus.text)
        Route_links.append(link)
        Route_names.append(routes)
    for bus_type_elem in bus_type:
        Bus_types.append(bus_type_elem.text)
    for start_time_elem in start_time:
        Departure.append(start_time_elem.text)
    for end_time_elem in end_time:
        Arrival.append(end_time_elem.text)
    for total_duration_elem in total_duration:
        Total_Duration.append(total_duration_elem.text)
    for ratings_elem in rating:
        Ratings.append(ratings_elem.text if ratings_elem else "N/A")
    for price_elem in price:
        Prices.append(price_elem.text)
    for seats_elem in seats:
        Seats_Available.append(seats_elem.text)

print("Bus details extracted successfully.")

# Save bus data
data = {
    'Route_name': Route_names,
    'Route_link': Route_links,
    'Bus_name': Bus_names,
    'Bus_type': Bus_types,
    'Departing_time': Departure,
    'Total_duration': Total_Duration,
    'Reaching_time': Arrival,
    'Star_Rating': Ratings,
    'Price': Prices,
    'Seats_Available': Seats_Available
    
    
}
df_buses = pd.DataFrame(data)
df_buses.to_csv("redbus9_details.csv", index=False)
print("Bus details saved successfully.")

# Close the WebDriver
driver.quit()

Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found
Route details saved successfully.
Bus details extracted successfully.
Bus details saved successfully.


In [12]:
from mysql import connector
connection =connector.connect(
    host="localhost",
    user="root",
    password="Mona@999"
)
mycursor = connection.cursor()

In [13]:
query = "Create database if not exists REDBUS_DETAILS"
mycursor.execute(query)

In [15]:
query ="use RED_BUS"
mycursor.execute(query)

In [9]:
import pandas as pd
import mysql.connector

# Reading the CSV files
dfbus1 = pd.read_csv("redbus0_details.csv")
dfbus2 = pd.read_csv("redbus1_details.csv")
dfbus3 = pd.read_csv("redbus2_details.csv")
dfbus4 = pd.read_csv("redbus3_details.csv")
dfbus5 = pd.read_csv("redbus4_details.csv")
dfbus6 = pd.read_csv("redbus5_details.csv")
dfbus7 = pd.read_csv("redbus6_details.csv")
dfbus8 = pd.read_csv("redbus7_details.csv")
dfbus9 = pd.read_csv("redbus8_details.csv")
dfbus10 = pd.read_csv("redbus9_details.csv")

# Concatenating all dataframes into one
dfbus = pd.concat([dfbus1, dfbus2, dfbus3, dfbus4, dfbus5, dfbus6, dfbus7, dfbus8, dfbus9, dfbus10], ignore_index=True)




  dfbus = pd.concat([dfbus1, dfbus2, dfbus3, dfbus4, dfbus5, dfbus6, dfbus7, dfbus8, dfbus9, dfbus10], ignore_index=True)


In [10]:
dfbus.head(10)

Unnamed: 0,Route_name,Route_link,Bus_name,Bus_type,Departing_time,Total_duration,Reaching_time,Star_Rating,Price,Seats_Available
0,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 35085,DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle),18:30,06h 55m,01:25,4.0\n71,720.0,43 Seats available
1,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 3613,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",19:00,07h 42m,02:42,2.5\n22,469.0,21 Seats available
2,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 9316,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",20:20,07h 50m,04:10,4.4\n67,469.0,28 Seats available
3,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 35189,DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle),20:30,04h 50m,01:20,4.0\n43,670.0,35 Seats available
4,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 4046,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",21:00,06h 40m,03:40,4.3\n37,469.0,17 Seats available
5,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 9651,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",21:00,07h 41m,04:41,4.3\n29,469.0,30 Seats available
6,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 9654,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",21:40,07h 40m,05:20,3.6\n34,469.0,12 Seats available
7,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 40568,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",21:42,08h 18m,06:00,3.9\n54,469.0,31 Seats available
8,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 3916,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",21:45,05h 05m,02:50,4.4\n77,469.0,23 Seats available
9,Vijayawada to Hyderabad,https://www.redbus.in/bus-tickets/vijayawada-t...,APSRTC - 35179,VENNELA (A.C. SLEEPER),21:45,04h 50m,02:35,4.7\n23,781.0,20 Seats available


In [11]:
dfbus.isna().sum()

Route_name         0
Route_link         0
Bus_name           0
Bus_type           0
Departing_time     0
Total_duration     0
Reaching_time      0
Star_Rating        0
Price              0
Seats_Available    0
dtype: int64

In [12]:
dfbus.shape

(1825, 10)

In [13]:
dfbus['Route_link'].unique

<bound method Series.unique of 0       https://www.redbus.in/bus-tickets/vijayawada-t...
1       https://www.redbus.in/bus-tickets/vijayawada-t...
2       https://www.redbus.in/bus-tickets/vijayawada-t...
3       https://www.redbus.in/bus-tickets/vijayawada-t...
4       https://www.redbus.in/bus-tickets/vijayawada-t...
                              ...                        
1820    https://www.redbus.in/bus-tickets/hyderabad-to...
1821    https://www.redbus.in/bus-tickets/hyderabad-to...
1822    https://www.redbus.in/bus-tickets/hyderabad-to...
1823    https://www.redbus.in/bus-tickets/hyderabad-to...
1824    https://www.redbus.in/bus-tickets/hyderabad-to...
Name: Route_link, Length: 1825, dtype: object>

In [14]:
dfbus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1825 entries, 0 to 1824
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Route_name       1825 non-null   object 
 1   Route_link       1825 non-null   object 
 2   Bus_name         1825 non-null   object 
 3   Bus_type         1825 non-null   object 
 4   Departing_time   1825 non-null   object 
 5   Total_duration   1825 non-null   object 
 6   Reaching_time    1825 non-null   object 
 7   Star_Rating      1825 non-null   object 
 8   Price            1825 non-null   float64
 9   Seats_Available  1825 non-null   object 
dtypes: float64(1), object(9)
memory usage: 142.7+ KB


In [15]:
dfbus['Departing_time']=pd.to_datetime(dfbus['Departing_time'],format="%H:%M").dt.strftime("%H:%M")

In [16]:
dfbus['Reaching_time']=pd.to_datetime(dfbus['Reaching_time'],format="%H:%M").dt.strftime('%H:%M')

In [17]:
print(type(dfbus['Departing_time']))

<class 'pandas.core.series.Series'>


In [18]:
print(type(dfbus['Reaching_time']))

<class 'pandas.core.series.Series'>


In [19]:
dfbus['Departing_time']

0       18:30
1       19:00
2       20:20
3       20:30
4       21:00
        ...  
1820    19:00
1821    19:00
1822    19:05
1823    19:45
1824    20:40
Name: Departing_time, Length: 1825, dtype: object

In [20]:
dfbus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1825 entries, 0 to 1824
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Route_name       1825 non-null   object 
 1   Route_link       1825 non-null   object 
 2   Bus_name         1825 non-null   object 
 3   Bus_type         1825 non-null   object 
 4   Departing_time   1825 non-null   object 
 5   Total_duration   1825 non-null   object 
 6   Reaching_time    1825 non-null   object 
 7   Star_Rating      1825 non-null   object 
 8   Price            1825 non-null   float64
 9   Seats_Available  1825 non-null   object 
dtypes: float64(1), object(9)
memory usage: 142.7+ KB


In [21]:
#convert rating to float
dfbus['Star_Rating']

0       4.0\n71
1       2.5\n22
2       4.4\n67
3       4.0\n43
4       4.3\n37
         ...   
1820        5.0
1821        4.5
1822        3.4
1823        3.9
1824        3.6
Name: Star_Rating, Length: 1825, dtype: object

In [22]:
dfbus['Star_Rating'] = dfbus['Star_Rating'].str.split('\n').str[0]
dfbus['Star_Rating'] = pd.to_numeric(dfbus['Star_Rating'], errors='coerce')

In [23]:
dfbus['Star_Rating']

0       4.0
1       2.5
2       4.4
3       4.0
4       4.3
       ... 
1820    5.0
1821    4.5
1822    3.4
1823    3.9
1824    3.6
Name: Star_Rating, Length: 1825, dtype: float64

In [24]:
dfbus.shape

(1825, 10)

In [25]:
dfbus['Star_Rating'].dtype

dtype('float64')

In [26]:
#convert column of seats availability in integer
dfbus['Seats_Available'].dtype

dtype('O')

In [27]:
dfbus['Seats_Available']

0       43 Seats available
1       21 Seats available
2       28 Seats available
3       35 Seats available
4       17 Seats available
               ...        
1820    27 Seats available
1821    25 Seats available
1822    28 Seats available
1823    23 Seats available
1824    18 Seats available
Name: Seats_Available, Length: 1825, dtype: object

In [28]:
dfbus['Seats_Available']=dfbus['Seats_Available'].str.replace("Seats available","").str[0]

In [29]:
dfbus[dfbus['Seats_Available']==" "]

Unnamed: 0,Route_name,Route_link,Bus_name,Bus_type,Departing_time,Total_duration,Reaching_time,Star_Rating,Price,Seats_Available


In [30]:
len(dfbus[dfbus['Seats_Available']==0])

0

In [31]:
dfbus['Seats_Available']

0       4
1       2
2       2
3       3
4       1
       ..
1820    2
1821    2
1822    2
1823    2
1824    1
Name: Seats_Available, Length: 1825, dtype: object

In [32]:
dfbus['Seats_Available']=dfbus['Seats_Available'].astype(int)

In [33]:
#price to decimal
dfbus['Price'].dtype

dtype('float64')

In [34]:
dfbus[dfbus['Price']==" "]

Unnamed: 0,Route_name,Route_link,Bus_name,Bus_type,Departing_time,Total_duration,Reaching_time,Star_Rating,Price,Seats_Available


In [35]:
dfbus['Price'].unique()

array([ 720.  ,  469.  ,  670.  ,  781.  ,  567.  ,  607.  ,  379.  ,
        419.  ,  399.  ,  559.  ,  369.  ,  281.  ,  383.  ,  269.  ,
        239.  , 5000.  ,  186.  ,  220.  ,  304.  ,  332.  ,  300.  ,
        294.  ,  314.  ,  280.  ,  398.  ,  284.  ,  425.  ,  506.  ,
        362.  ,  585.  ,  589.  ,  489.  ,  599.  ,  307.  ,  340.  ,
        700.  ,  900.  ,  750.  ,  800.  ,  849.  ,  600.  ,  550.  ,
        850.  , 3999.  ,  439.  ,  412.  ,  526.  ,  656.  ,  500.  ,
        624.  ,  602.  ,  590.  ,  555.  ,  605.  ,  719.  ,  671.  ,
        556.  ,  749.  ,  499.  ,  470.  ,  217.  ,  277.  ,  246.  ,
        200.  ,  215.  ,  404.  ,  608.  ,  604.  ,  509.  ,  205.  ,
        225.  ,  301.  ,  400.  , 3500.  ,  467.  ,  344.  ,  528.  ,
        342.  ,  328.  ,  312.  ,  380.  ,  450.  ,  209.  ,  149.  ,
        253.  ,  222.  ,  204.  ,  238.  ,  483.  ,  743.  ,  817.  ,
        626.  ,  531.  ,  353.  ,  330.  ,  422.  ,  350.  ,  675.  ,
        809.  ,  162

In [36]:
dfbus['Price']=dfbus['Price'].astype(float).round(2)

In [37]:
dfbus['Price']

0       720.0
1       469.0
2       469.0
3       670.0
4       469.0
        ...  
1820    680.0
1821    580.0
1822    580.0
1823    499.0
1824    699.0
Name: Price, Length: 1825, dtype: float64

In [38]:
dfbus[dfbus['Total_duration']==" "]

Unnamed: 0,Route_name,Route_link,Bus_name,Bus_type,Departing_time,Total_duration,Reaching_time,Star_Rating,Price,Seats_Available


In [39]:
dfbus['Total_duration'].unique()

array(['06h 55m', '07h 42m', '07h 50m', '04h 50m', '06h 40m', '07h 41m',
       '07h 40m', '08h 18m', '05h 05m', '06h 20m', '06h 08m', '06h 25m',
       '05h 42m', '07h 05m', '05h 44m', '07h 15m', '06h 50m', '06h 15m',
       '05h 40m', '07h 10m', '07h 00m', '04h 00m', '02h 50m', '05h 15m',
       '03h 00m', '03h 30m', '03h 05m', '03h 50m', '03h 55m', '04h 15m',
       '04h 10m', '03h 10m', '04h 20m', '03h 40m', '04h 25m', '03h 25m',
       '05h 16m', '28h 25m', '05h 25m', '04h 37m', '04h 30m', '06h 10m',
       '06h 00m', '05h 30m', '03h 45m', '29h 45m', '05h 00m', '04h 45m',
       '03h 52m', '04h 21m', '02h 55m', '01h 59m', '03h 35m', '03h 20m',
       '06h 05m', '03h 15m', '08h 10m', '07h 25m', '06h 30m', '08h 00m',
       '07h 45m', '08h 05m', '07h 51m', '07h 29m', '10h 06m', '10h 01m',
       '08h 36m', '09h 06m', '07h 20m', '07h 30m', '06h 34m', '07h 27m',
       '07h 09m', '07h 03m', '07h 04m', '07h 35m', '04h 40m', '05h 45m',
       '03h 39m', '01h 45m', '04h 59m', '04h 35m', 

In [37]:
dfbus['Bus_type'].unique()

array(['DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle)',
       'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'VENNELA (A.C. SLEEPER)',
       'INDRA(A.C. Seater)', 'AMARAVATHI (VOLVO / SCANIA A.C Multi Axle)',
       'STAR LINER(NON-AC SLEEPER 2+1)', 'A/C Seater / Sleeper (2+1)',
       'Bharat Benz A/C Seater /Sleeper (2+1)',
       'Electric A/C Seater (2+2)', 'AC Sleeper (2+1)',
       'METRO LUXURY A/C', 'ULTRA DELUXE (NON-AC, 2+2 PUSH BACK)',
       'Non A/C Seater / Sleeper (2+1)', 'Express(Non AC Seater)',
       'A/C Seater (2+2)', 'SAPTAGIRI EXPRESS',
       'Bharat Benz A/C Sleeper (2+1)', 'A/C Sleeper (2+1)',
       'Volvo A/C B11R Multi Axle Semi Sleeper (2+2)',
       'Volvo Multi Axle A/C Sleeper I-Shift B11R (2+1)',
       'Volvo 9600 Multi-Axle A/C Sleeper (2+1)', 'NON A/C Sleeper (2+1)',
       'Volvo Multi-Axle I-Shift A/C Semi Sleeper (2+2)',
       'Volvo Multi-Axle Sleeper A/C (2+1)', 'A/C Seater/Sleeper (2+1)',
       'Volvo Multi-Axle A/C Sleeper (2+1)',
       'Vol

In [40]:
dfbus[dfbus['Route_name']==" "]

Unnamed: 0,Route_name,Route_link,Bus_name,Bus_type,Departing_time,Total_duration,Reaching_time,Star_Rating,Price,Seats_Available


In [47]:
import numpy as np
dfbus=dfbus.replace({np.nan:None})

In [48]:
dfbus.to_csv("10states_details.csv",index=False)

In [49]:
dfbus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1825 entries, 0 to 1824
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Route_name       1825 non-null   object 
 1   Route_link       1825 non-null   object 
 2   Bus_name         1825 non-null   object 
 3   Bus_type         1825 non-null   object 
 4   Departing_time   1825 non-null   object 
 5   Total_duration   1825 non-null   object 
 6   Reaching_time    1825 non-null   object 
 7   Star_Rating      1817 non-null   object 
 8   Price            1825 non-null   float64
 9   Seats_Available  1825 non-null   int64  
dtypes: float64(1), int64(1), object(8)
memory usage: 142.7+ KB


SQL

In [2]:
from mysql import connector
connection =connector.connect(
    host="localhost",
    user="root",
    password="Mona@999"
)
mycursor = connection.cursor()

In [3]:
query="create database if not exists redbusproject"
mycursor.execute(query)

In [4]:
query ="use redbusproject"
mycursor.execute(query)

In [None]:
query ="drop table bus_details"
mycursor.execute(query)

In [5]:
mycursor.execute("""
CREATE TABLE IF NOT EXISTS bus_details (
    ID INT AUTO_INCREMENT PRIMARY KEY,
    Route_name VARCHAR(255) NULL,
    Route_link VARCHAR(255) NULL,
    Bus_name VARCHAR(255) NOT NULL,
    Bus_type VARCHAR(255) NOT NULL,
    Departing_time VARCHAR(255) NOT NULL,
    Total_duration VARCHAR(255) NOT NULL,
    Reaching_time VARCHAR(255) NOT NULL,
    Star_Rating FLOAT NULL,
    Price DECIMAL(10,2),
    Seats_available VARCHAR(255) NOT NULL
    
    
    
)
""")

print("Table created successfully")


Table created successfully


In [6]:
query="describe bus_details "
mycursor.execute(query)
for db in mycursor:
    print(db)

('ID', 'int', 'NO', 'PRI', None, 'auto_increment')
('Route_name', 'varchar(255)', 'YES', '', None, '')
('Route_link', 'varchar(255)', 'YES', '', None, '')
('Bus_name', 'varchar(255)', 'NO', '', None, '')
('Bus_type', 'varchar(255)', 'NO', '', None, '')
('Departing_time', 'varchar(255)', 'NO', '', None, '')
('Total_duration', 'varchar(255)', 'NO', '', None, '')
('Reaching_time', 'varchar(255)', 'NO', '', None, '')
('Star_Rating', 'float', 'YES', '', None, '')
('Price', 'decimal(10,2)', 'YES', '', None, '')
('Seats_available', 'varchar(255)', 'NO', '', None, '')


In [50]:
insert_query = """
INSERT INTO bus_details (
    Route_name,
    Route_link,
    Bus_name,
    Bus_type,
    Departing_time,
    Total_duration,
    Reaching_time,
    Star_Rating,
    Price,
    Seats_available
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

for index, row in dfbus.iterrows():
    mycursor.execute(insert_query, (
        row['Route_name'],
        row['Route_link'],
        row['Bus_name'],
        row['Bus_type'],
        row['Departing_time'],
        row['Total_duration'],
        row['Reaching_time'],
        row['Star_Rating'],
        row['Price'],
        row['Seats_Available']
    ))

connection.commit()
print("Data inserted successfully!")


Data inserted successfully!
