In [3]:
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import pandas as pd

driver = webdriver.Chrome()
driver.maximize_window()
link = 'https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/'
driver.get(link)

wait = WebDriverWait(driver, 20)

routelist = []
routelinklist = []

scrolling = True
while scrolling:
    old_page_source = driver.page_source
    body = driver.find_element(By.TAG_NAME, "body")
    body.send_keys(Keys.PAGE_DOWN)
    time.sleep(2)
    new_page_source = driver.page_source
    if new_page_source == old_page_source:
        scrolling = False

for page_number in range(1, 5):
    try:
        routes = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'route')))
        for route in routes:
            route_text = route.text
            route_link = route.get_attribute('href')
            routelist.append(route_text)
            routelinklist.append(route_link)

        if page_number < 4:
            next_page_xpath = f'//div[contains(@class,"DC_117_pageTabs") and text()="{page_number + 1}"]'
            next_page = wait.until(EC.element_to_be_clickable((By.XPATH, next_page_xpath)))
            driver.execute_script("arguments[0].scrollIntoView(true);", next_page)
            next_page.click()
            time.sleep(5)
    finally:
        print(f"Processed page {page_number}")

data = {'Route': routelist, 'Link': routelinklist}
df = pd.DataFrame(data)

print(df)
df.to_csv('SBSTC_routes_data.csv', index=False)
driver.quit()


Processed page 1
Processed page 2
Processed page 3
Processed page 4
                                                Route  \
0                                Durgapur to Calcutta   
1                                  Kolkata to Burdwan   
2                                  Haldia to Calcutta   
3                                   Kolkata to Haldia   
4                   Kolkata to Durgapur (West Bengal)   
5                   Kolkata to Arambagh (West Bengal)   
6                                Midnapore to Kolkata   
7                                    Kolkata to Digha   
8                                   Digha to Calcutta   
9                                  Kolkata to Bankura   
10                               Kolkata to Midnapore   
11                   Kolkata to Asansol (West Bengal)   
12                                Kolkata to Nimtouri   
13                                Jhargram to Kolkata   
14                         Kolkata to Contai (Kanthi)   
15                  

In [6]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.maximize_window()
master_df = pd.DataFrame()

for link, routename in zip(routelinklist, routelist):
    driver.get(link)

    state = 'SBSTC'
    statelist = []
    busroutelist = []
    busnamelist = []
    bustypelist = []
    departing_time_list = []
    duration_list = []
    reaching_time_list = []
    star_rating_list = []
    price_list = []
    seats_available_list = []

    scrolling = True
    last_height = driver.execute_script("return document.body.scrollHeight")

    while scrolling:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        time.sleep(3)
        if new_height == last_height:
            scrolling = False
        else:
            last_height = new_height 

    busname_1 = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    bustype_1 = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
    departing_time_1 = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
    duration_1 = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
    reaching_time_1 = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
    star_rating_1 = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']//span")
    price_1 = driver.find_elements(By.XPATH, "//span[contains(@class, 'f-19')]")
    seats_available_1 = driver.find_elements(By.XPATH, "//div[@class='seat-left m-top-30']")

    for busname, bustype, departing_time, duration_time, reaching_time, star, price, seats in zip(
        busname_1, bustype_1, departing_time_1, duration_1, reaching_time_1, star_rating_1, price_1, seats_available_1):
        busnamelist.append(busname.text)
        bustypelist.append(bustype.text)
        departing_time_list.append(departing_time.text)
        duration_list.append(duration_time.text)
        reaching_time_list.append(reaching_time.text)
        star_rating_list.append(star.text)
        price_list.append(price.text)
        seats_available_list.append(seats.text)
        busroutelist.append(routename)
        statelist.append(state)

    data = {
        'State': statelist,
        'Route': busroutelist,
        'Bus Name': busnamelist,
        'Bus Type': bustypelist,
        'Departing Time': departing_time_list,
        'Duration Time': duration_list,
        'Reaching Time': reaching_time_list,
        'Price': price_list,
        'Seats Available': seats_available_list,
        'Star Rating': star_rating_list
    }

    df = pd.DataFrame(data)
    master_df = pd.concat([master_df, df], ignore_index=True)
    master_df.to_csv('SBSTC.csv', index=False)

    print(f"Data from {link} added successfully!")

print("Final CSV saved.")
driver.quit()


Data from https://www.redbus.in/bus-tickets/durgapur-to-kolkata added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-burdwan added successfully!
Data from https://www.redbus.in/bus-tickets/haldia-to-kolkata added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-haldia added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-durgapur added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-arambagh-west-bengal added successfully!
Data from https://www.redbus.in/bus-tickets/midnapore-to-kolkata added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-digha added successfully!
Data from https://www.redbus.in/bus-tickets/digha-to-kolkata added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-bankura added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-midnapore added successfully!
Data from https://www.redbus.in/bus-tickets/kolkata-to-asansol added succe