In [None]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from datetime import datetime
import pytz

driver = webdriver.Chrome()

# 🔗 Choose one state (uncomment APSRTC to test for Andhra Pradesh)
state_links = [
    "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile",
    "https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile"
]

india = pytz.timezone("Asia/Kolkata")
today_dt = datetime.now(india)
date_of_scrape = today_dt.strftime("%d-%m-%Y")

all_buses_data = []

def safe_text(by, value, context):
    try:
        return context.find_element(by, value).text
    except:
        return None

def scroll_stepwise_private_buses():
    scraped_count = 0
    stable_rounds = 0

    while True:
        buses = driver.find_elements(By.CLASS_NAME, "tupleWrapper___aa6a16")
        current_count = len(buses)

        if current_count > scraped_count:
            scraped_count = current_count
            stable_rounds = 0
        else:
            stable_rounds += 1
            time.sleep(4)

        if stable_rounds >= 2:
            break

        if current_count > 0:
            try:
                last_bus = buses[min(current_count, len(buses)) - 1]
                driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", last_bus)
            except:
                break

        if current_count % 4 == 0:
            time.sleep(4)
        else:
            time.sleep(2)

    return driver.find_elements(By.CLASS_NAME, "tupleWrapper___aa6a16")

def scroll_stepwise_government_buses():
    scraped_count = 0
    stable_rounds = 0

    while True:
        buses = driver.find_elements(By.CLASS_NAME, "tupleWrapper___aa6a16")
        current_count = len(buses)

        if current_count > scraped_count:
            scraped_count = current_count
            stable_rounds = 0
        else:
            stable_rounds += 1
            time.sleep(5)

        if stable_rounds >= 2:
            break

        if current_count > 0:
            try:
                last_bus = buses[min(current_count, len(buses)) - 1]
                driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", last_bus)
            except:
                break

        if current_count % 4 == 0:
            time.sleep(5)
        else:
            time.sleep(3)

    return driver.find_elements(By.CLASS_NAME, "tupleWrapper___aa6a16")

for state_url in state_links:
    print(f"\n🔄 Scraping state page: {state_url}")
    driver.get(state_url)
    time.sleep(6)

    try:
        state_name = driver.find_element(By.CLASS_NAME, "D120_search_h1").text.strip()
    except:
        state_name = "Unknown Transport Corp"

    print(f"📍 State Transport Corp: {state_name}")

    routes = []
    while True:
        time.sleep(2)
        for r in driver.find_elements(By.CLASS_NAME, "route"):
            route_name = r.text
            route_link = r.get_attribute("href")
            if route_link:
                routes.append({"route_name": route_name, "route_link": route_link})

        try:
            pages = driver.find_elements(By.CLASS_NAME, "DC_117_pageTabs")
            current_active = driver.find_element(By.CLASS_NAME, "DC_117_pageActive")
            current_index = pages.index(current_active)

            if current_index + 1 < len(pages):
                next_page = pages[current_index + 1]
                driver.execute_script("arguments[0].click();", next_page)
            else:
                break
        except:
            break

    for route in routes:
        print(f"\n➡️ Scraping route: {route['route_name']}")
        driver.get(route["route_link"])
        time.sleep(6)

        private_buses = scroll_stepwise_private_buses()
        for bus in private_buses:
            try:
                busname = safe_text(By.CLASS_NAME, "travelsName___495898", bus)
                price = safe_text(By.CLASS_NAME, "finalFare___898bb7", bus)
                bustype = safe_text(By.CLASS_NAME, "busType___13ff4b", bus)
                departing_time = safe_text(By.CLASS_NAME, "boardingTime___aced27", bus)
                duration = safe_text(By.CLASS_NAME, "duration___5b44b1", bus)
                reaching_time = safe_text(By.CLASS_NAME, "droppingTime___616c2f", bus)
                star_rating = safe_text(By.CLASS_NAME, "rating___7724f1", bus)
                seats_available = safe_text(By.CLASS_NAME, "totalSeats___ba48cf", bus)

                all_buses_data.append({
                    "state": state_name,
                    "route": route["route_name"],
                    "busname": busname,
                    "price": price,
                    "bustype": bustype,
                    "departing_time": departing_time,
                    "duration": duration,
                    "reaching_time": reaching_time,
                    "star_rating": star_rating,
                    "seats_available": seats_available,
                    "type": "Private",
                    "date_of_scrape": date_of_scrape
                })
            except:
                continue

        try:
            tabs = driver.find_elements(By.CLASS_NAME, "rtcName___f9dd9e")
            for tab in tabs:
                gov_tab_text = tab.text.strip().lower()
                if "rtc" in gov_tab_text or "buses" in gov_tab_text:
                    print(f"🚌 Clicking Government tab: {tab.text}")
                    driver.execute_script("arguments[0].click();", tab)
                    time.sleep(4)

                    gov_buses = scroll_stepwise_government_buses()
                    for bus in gov_buses:
                        try:
                            busname = safe_text(By.CLASS_NAME, "travelsName___495898", bus)
                            price = safe_text(By.CLASS_NAME, "finalFare___898bb7", bus)
                            bustype = safe_text(By.CLASS_NAME, "busType___13ff4b", bus)
                            departing_time = safe_text(By.CLASS_NAME, "boardingTime___aced27", bus)
                            duration = safe_text(By.CLASS_NAME, "duration___5b44b1", bus)
                            reaching_time = safe_text(By.CLASS_NAME, "droppingTime___616c2f", bus)
                            star_rating = safe_text(By.CLASS_NAME, "rating___7724f1", bus)
                            seats_available = safe_text(By.CLASS_NAME, "totalSeats___ba48cf", bus)

                            all_buses_data.append({
                                "state": state_name,
                                "route": route["route_name"],
                                "busname": busname,
                                "price": price,
                                "bustype": bustype,
                                "departing_time": departing_time,
                                "duration": duration,
                                "reaching_time": reaching_time,
                                "star_rating": star_rating,
                                "seats_available": seats_available,
                                "type": "Government",
                                "date_of_scrape": date_of_scrape
                            })
                        except:
                            continue
                    break
        except:
            print("⚠️ Government tab not found")

print(f"\n✅ Total Buses Collected: {len(all_buses_data)}")
for row in all_buses_data:
    print(row)

driver.quit()
