In [1]:
#KSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df10 = pd.DataFrame(all_bus_details)
df10.to_csv('Kerala.csv', index=False)


In [2]:
df10

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...,P K Travels,A/C Seater / Sleeper (2+1),21:30,08h 30m,06:00,4.7,INR 800,21 Seats available
1,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...,Sam Tourist,NON AC Seater / Sleeper 2+1,22:05,09h 00m,07:05,4.0,INR 799,26 Seats available
2,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...,SAAM BUS,VE A/C Sleeper (2+1),21:45,09h 00m,06:45,4.4,INR 900,11 Seats available
3,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...,AdSuraj Holidays,NON AC Seater / Sleeper 2+1,20:30,09h 30m,06:00,3.6,INR 750,8 Seats available
4,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
793,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...,SKS Tourist Corporation (REGD),Non A/C Seater / Sleeper (2+1),21:30,06h 25m,03:55,3.5,INR 799,40 Seats available
794,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...,SKS Tourist Corporation (REGD),Non A/C Seater / Sleeper (2+1),21:30,06h 30m,04:00,3.2,INR 799,27 Seats available
795,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...,Vijay Lines,A/C Seater / Sleeper (2+1),22:20,06h 25m,04:45,2.1,949,40 Seats available
796,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...,P K Travels,A/C Seater / Sleeper (2+1),21:30,06h 30m,04:00,5.0,INR 800,26 Seats available


In [3]:
#RSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df1 = pd.DataFrame(all_bus_details)
df1.to_csv('Rajasthan.csv', index=False)


In [15]:
df1

NameError: name 'df1' is not defined

In [5]:
#TGSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df2 = pd.DataFrame(all_bus_details)
df2.to_csv('Telungana.csv', index=False)


In [16]:
df2

NameError: name 'df2' is not defined

In [7]:
#KTCL
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df3 = pd.DataFrame(all_bus_details)
df3.to_csv('Kadamba.csv', index=False)


Error sending stats to Plausible: error sending request for url (https://plausible.io/api/event)


In [17]:
df3

NameError: name 'df3' is not defined

In [11]:
#WBTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/west-bengal-transport-corporation?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df4 = pd.DataFrame(all_bus_details)
df4.to_csv('West_Bengal.csv', index=False)


In [18]:
df4

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Digha to Kolkata,https://www.redbus.in/bus-tickets/digha-to-kol...,Shyamoli Paribahan Pvt Ltd,VE A/C Seater (2+2),13:45,05h 10m,18:55,4.3,INR 350,24 Seats available
1,Digha to Kolkata,https://www.redbus.in/bus-tickets/digha-to-kol...,Shyamoli Paribahan Pvt Ltd,Volvo A/C Seater (2+2),13:30,04h 55m,18:25,4.3,INR 350,20 Seats available
2,Digha to Kolkata,https://www.redbus.in/bus-tickets/digha-to-kol...,Express Line,Scania Multi-Axle AC Semi Sleeper (2+2),13:45,04h 30m,18:15,4.5,INR 299,12 Seats available
3,Digha to Kolkata,https://www.redbus.in/bus-tickets/digha-to-kol...,,,,,,,,
4,Digha to Kolkata,https://www.redbus.in/bus-tickets/digha-to-kol...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
5499,Mandarmani to Kolkata,https://www.redbus.in/bus-tickets/mandarmani-t...,Aradhana Travels,Non AC Seater (2+3),22:15,05h 10m,03:25,0,450,13 Seats available
5500,Mandarmani to Kolkata,https://www.redbus.in/bus-tickets/mandarmani-t...,Snemita Paribahan,AC Seater (2+3),11:45,03h 50m,15:35,0,INR 370,12 Seats available
5501,Mandarmani to Kolkata,https://www.redbus.in/bus-tickets/mandarmani-t...,City Express (Snemita),AC Seater (2+3),11:45,03h 45m,15:30,0,INR 370,28 Seats available
5502,Mandarmani to Kolkata,https://www.redbus.in/bus-tickets/mandarmani-t...,Aradhana Bus Service,A/C Seater / Sleeper (2+2),23:20,04h 40m,04:00,0,INR 550,71 Seats available


In [2]:
#UPSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df5 = pd.DataFrame(all_bus_details)
df5.to_csv('Uttar_Pradesh.csv', index=False)


In [3]:
df5

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...,R S YADAV SMART BUS PRIVATE LIMITED,Bharat Benz A/C Seater /Sleeper (2+1),21:15,07h 20m,04:35,4.4,INR 799,25 Seats available
1,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...,FlixBus,AC Seater (2+2),23:35,05h 25m,05:00,4.8,INR 193.75,23 Seats available
2,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...,New Khaira Transport,A/C Seater / Sleeper (2+2),21:00,06h 30m,03:30,1.6,INR 500,21 Seats available
3,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...,SHRI RAM TRAVELS,A/C Seater (2+2),23:59,05h 16m,05:15,4.0,INR 740,16 Seats available
4,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2654,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),19:30,04h 15m,23:45,3.0,INR 499,21 Seats available
2655,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...,ARORA TRAVELS,Volvo AC Seater (2+2),21:01,04h 40m,01:41,4.7,INR 703,25 Seats available
2656,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),21:15,04h 15m,01:30,3.4,INR 499,20 Seats available
2657,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...,Sethi Yatra Company,A/C Seater / Sleeper (2+1),18:00,05h 40m,23:40,1.0,509,35 Seats available


In [4]:
#CTU
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df6 = pd.DataFrame(all_bus_details)
df6.to_csv('Chandigarh.csv', index=False)


In [5]:
df6

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...,Zimindara Travels,AC Sleeper (2+1),20:30,04h 00m,00:30,4.1,INR 799,25 Seats available
1,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...,SHRI KRISHNA TRAVELS (JAI SHREE GANESH YATRA CO.),VE A/C Seater / Sleeper (2+1),20:40,02h 50m,23:30,3.7,INR 1299,24 Seats available
2,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...,City Land Travels,A/C Seater/Sleeper (2+1),22:20,02h 00m,00:20,3.5,INR 1088,32 Seats available
3,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...,KN Nehra Travels,A/C Seater/Sleeper (2+1),22:50,02h 25m,01:15,2.9,INR 799,32 Seats available
4,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
7406,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...,Khurana Bus Service,Volvo A/C (2+2),17:00,04h 30m,21:30,5.0,INR 3500,35 Seats available
7407,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...,Laxmi holidays,Volvo 9600 Multi-Axle A/C Sleeper (2+1),23:50,04h 10m,04:00,4.8,1282,21 Seats available
7408,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...,Big Bus,A/C Seater / Sleeper (2+2),20:20,04h 20m,00:40,2.5,INR 1395,39 Seats available
7409,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...,Northern Travels,Bharat Benz A/C Sleeper (2+1),22:30,05h 00m,03:30,2.8,1899,24 Seats available


In [6]:
#BSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/bihar-state-road-transport-corporation-bsrtc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df7 = pd.DataFrame(all_bus_details)
df7.to_csv('Bihar.csv', index=False)


In [7]:
df7

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...,Bihar state road transport corporation (BSRTC)...,Volvo AC Seater 2+2,16:30,16h 00m,08:30,3.2,1020,39 Seats available
1,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...,Panwar tour and Travels,A/C Seater / Sleeper (2+1),15:40,17h 50m,09:30,3.2,INR 1050,31 Seats available
2,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...,ROYAL TRAVELS AND CARGO,A/C Seater / Sleeper (2+2),17:30,15h 00m,08:30,1.8,INR 796,32 Seats available
3,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...,Rajdhani Express,A/C Seater / Sleeper (2+2),15:00,16h 00m,07:00,1.0,1757,30 Seats available
4,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...,Travel Point World LLP,A/C Seater / Sleeper (2+2),20:20,14h 30m,10:50,1.9,INR 1999,38 Seats available
...,...,...,...,...,...,...,...,...,...,...
246,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea,Jay Jagdambay Tour And Travels,A/C Seater / Sleeper (2+1),17:00,09h 30m,02:30,2.0,613,14 Seats available
247,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea,Shree Sai Krishna Luxury,A/C Seater / Sleeper (2+1),21:15,10h 00m,07:15,2.6,629,18 Seats available
248,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...,Bihar state road transport corporation (BSRTC)...,AC Seater (2+2),21:25,09h 05m,06:30,3.3,INR 468,28 Seats available
249,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...,SOBHA TRAVELS (A.T),A/C Seater / Sleeper (2+1),20:40,08h 20m,05:00,3.3,540,28 Seats available


In [9]:
#APSRTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df8 = pd.DataFrame(all_bus_details)
df8.to_csv('Andra.csv', index=False)


In [10]:
df8

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,FRESHBUS,Electric A/C Seater (2+2),23:10,06h 25m,05:35,4.6,649,24 Seats available
1,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,IntrCity SmartBus,Bharat Benz A/C Seater /Sleeper (2+1),23:50,05h 45m,05:35,4.5,INR 612,33 Seats available
2,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,FRESHBUS,Electric A/C Seater (2+2),13:10,06h 15m,19:25,4.6,549,20 Seats available
3,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,AdIntrCity SmartBus,A/C Seater / Sleeper (2+1),23:05,06h 55m,06:00,4.4,INR 661,35 Seats available
4,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
23004,Visakhapatnam to Kakinada,https://www.redbus.in/bus-tickets/visakhapatna...,Navayuga Travels,A/C Sleeper (2+1),22:00,04h 10m,02:10,3.6,INR 1099,21 Seats available
23005,Visakhapatnam to Kakinada,https://www.redbus.in/bus-tickets/visakhapatna...,Sai RK Travels,Non A/C Seater / Sleeper (2+1),22:40,03h 50m,02:30,4.4,INR 750,15 Seats available
23006,Visakhapatnam to Kakinada,https://www.redbus.in/bus-tickets/visakhapatna...,Morning Star Travels,A/C Sleeper (2+1),21:10,05h 15m,02:25,2.7,INR 990,8 Seats available
23007,Visakhapatnam to Kakinada,https://www.redbus.in/bus-tickets/visakhapatna...,Sai Sree Travels,A/C Sleeper (2+1),22:00,04h 30m,02:30,2.7,INR 999,24 Seats available


In [1]:
#ASTC
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

def setup_browser():
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/online-booking/astc/?utm_source=rtchometile")
    driver.maximize_window()
    wait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'route')))
    return driver

def get_route_links_and_names(driver):
    routes_link = []
    routes_name = []
    pagination = wait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class="DC_117_paginationTable"]')))
    
    while True:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        routes_link.extend([route.get_attribute('href') for route in route_elements])
        routes_name.extend([route.text.strip() for route in route_elements])

        try:
            next_button = pagination.find_element(By.XPATH, './/div[@class="DC_117_pageTabs " and text()="Next"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            wait(driver, 10).until(EC.element_to_be_clickable(next_button)).click()
        except (NoSuchElementException, ElementClickInterceptedException):
            break

    return routes_link, routes_name

def scrape_bus_details(driver, url, route_name):
    driver.get(url)
    wait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "travels.lh-24.f-bold.d-color")))

    bus_details = []

    # Initial page load
    while True:
        bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_lists = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departure_lists = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_lists = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        arrival_lists = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        rating_lists = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_lists = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_lists = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

        for i in range(len(bus_name_lists)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_lists[i].text,
                "Bus_Type": bus_type_lists[i].text,
                "Departure": departure_lists[i].text,
                "Duration": duration_lists[i].text,
                "Arrival": arrival_lists[i].text,
                "Star_Rating": rating_lists[i].text if i < len(rating_lists) else '0',
                "Price": price_lists[i].text,
                "Seat_Availability": seat_availability_lists[i].text if i < len(seat_availability_lists) else '0',
            }
            bus_details.append(bus_detail)

        # Scroll down to load more results
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load

        # Check if new content has loaded
        new_bus_name_lists = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        if len(new_bus_name_lists) == len(bus_name_lists):
            break  # No new content loaded

    return bus_details


def scrape_all_pages():
    driver = setup_browser()
    all_bus_details = []
    
    try:
        all_bus_routes_link, all_bus_routes_name = get_route_links_and_names(driver)

        for link, name in zip(all_bus_routes_link, all_bus_routes_name):
            bus_details = scrape_bus_details(driver, link, name)
            if bus_details:
                all_bus_details.extend(bus_details)
    finally:
        driver.quit()

    return all_bus_details

# Run the scraper
all_bus_details = scrape_all_pages()
df9 = pd.DataFrame(all_bus_details)
df9.to_csv('Assam.csv', index=False)


In [2]:
df9

Unnamed: 0,Route_Name,Route_Link,Bus_Name,Bus_Type,Departure,Duration,Arrival,Star_Rating,Price,Seat_Availability
0,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,MAA Anada (UNDER ASTC),AC Seater (2+2),05:20,03h 10m,08:30,3.1,INR 304.2,22 Seats available
1,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Kanchan Travels,AC Seater (2+2),05:30,03h 15m,08:45,4.3,346,27 Seats available
2,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Mahi Travels(Under ASTC),A/C Seater (2+2),05:40,03h 15m,08:55,4.5,357,13 Seats available
3,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,SHIVAM TRAVELS,Bharat Benz A/C Seater (2+1),15:00,04h 15m,19:15,4.1,315,1 Seat available
4,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,WARISPIYA TRAVELS,AC Seater (2+2),05:45,03h 15m,09:00,4.2,INR 360,26 Seats available
...,...,...,...,...,...,...,...,...,...,...
374,Jorhat to Dibrugarh,https://www.redbus.in/bus-tickets/jorhat-to-di...,Bashudev Transline (Under ASTC),A/C Seater (2+1),14:10,03h 50m,18:00,4.3,INR 550,7 Seats available
375,Jorhat to Dibrugarh,https://www.redbus.in/bus-tickets/jorhat-to-di...,Christina Travels,NON A/C Seater (2+2),12:30,04h 00m,16:30,2.8,420,3 Seats available
376,North Lakhimpur to Jorhat,https://www.redbus.in/bus-tickets/north-lakhim...,Padmanav Travels,NON A/C Seater Push Back (2+2),05:30,08h 05m,13:35,3.6,561,38 Seats available
377,North Lakhimpur to Jorhat,https://www.redbus.in/bus-tickets/north-lakhim...,Priyanka,NON A/C Seater (2+1),17:30,11h 35m,05:05,3.7,INR 600,29 Seats available


In [13]:
df1

NameError: name 'df1' is not defined