In [77]:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

---

## 1. Price parsing

- `original_price`: Gi√° g·ªëc (gi√° ch∆∞a gi·∫£m, ƒë∆°n v·ªã: VND)
- `sale_price`: Gi√° khuy·∫øn m√£i (gi√° ƒë√£ gi·∫£m, ƒë∆°n v·ªã: VND)
- `precent_discount`: Ph·∫ßn trƒÉm s·ªë ti·ªÅn ƒë√£ gi·∫£m (ƒë∆°n v·ªã: %)

<pre>
- C√≥ gi√° khuy·∫øn m√£i: 
    &lt;fare-sale&gt; ch·ª©a gi√° khuy·∫øn m√£i
        &lt;fareSmall&gt; ch·ª©a gi√° g·ªëc v√† ph·∫ßn trƒÉm ∆∞u ƒë√£i
- Kh√¥ng c√≥ gi√° khuy·∫øn m√£i:
    &lt;fare&gt; ch·ª©a gi√° g·ªëc
        &lt;fareSmall&gt; kh√¥ng ch·ª©a d·ªØ li·ªáu
</pre>

In [78]:
def has_no_discount_price(block) -> bool:
    '''
    Ph√¢n lo·∫°i chuy·∫øn xe n√†y c√≥ gi√° khuy·∫øn m√£i hay kh√¥ng \n
    True: kh√¥ng c√≥ gi√° khuy·∫øn m√£i \n
    False: c√≥ gi√° khuy·∫øn m√£i
    '''
    fare = block.find('div', class_='fare')      
    
    if fare: 
        return True  # khong co gia khuyen mai
    
    return False     # co gia khuyen mai

def parse_fare(block):
    '''
    Tr√≠ch xu·∫•t d·ªØ li·ªáu v·ªÅ gi√° c·ªßa chuy·∫øn xe khi kh√¥ng c√≥ gi√° khuy·∫øn m√£i \n
    Tr·∫£ v·ªÅ: gi√° g·ªëc, gi√° khuy·∫øn m√£i = None, ph·∫ßn trƒÉm khuy·∫øn m√£i = None
    '''

    sale_price = None
    percent_discount = None

    fare = block.find('div', class_='fare')
    original_price = fare.get_text(strip=True).replace("ƒë", "").replace('T·ª´ ', '').strip()    
    
    return {
        "original_price": original_price ,
        "sale_price":sale_price,
        "percent_discount": percent_discount
        }

def parse_fare_small(block):
    '''
    D√†nh cho tr∆∞·ªùng h·ª£p c√≥ khuy·∫øn m√£i \n
    Tr·∫£ v·ªÅ: gi√° g·ªëc, ph·∫ßn trƒÉm khuy·∫øn m√£i
    '''

    fare_small = block.find('div', class_='fareSmall')

    original_price = fare_small.find('div', class_='small').get_text(strip=True).replace("ƒë", "").strip() if fare_small else None
    try:
        if fare_small.find('div', class_='percent'):
            percent_discount = fare_small.find('div', class_='percent').get_text(strip=True)
        else:
            percent_discount = None
    except Exception:
        percent_discount = None
        
    return original_price, percent_discount

def parse_fare_sale(block):
    '''
    Tr√≠ch xu·∫•t d·ªØ li·ªáu v·ªÅ gi√° c·ªßa chuy·∫øn xe khi c√≥ gi√° khuy·∫øn m√£i \n
    Tr·∫£ v·ªÅ: gi√° g·ªëc, gi√° khuy·∫øn m√£i, ph·∫ßn trƒÉm khuy·∫øn m√£i
    '''
    

    fare_sale = block.find('div', class_='fare-sale')
    original_price, percent_discount = parse_fare_small(block)
    sale_price = None
    if fare_sale and fare_sale.get_text(strip=True):
        sale_price = fare_sale.get_text(strip=True).replace("ƒë", "").replace('T·ª´ ', '').strip()
        
    return {
        "original_price": original_price ,
        "sale_price":sale_price,
        "percent_discount": percent_discount
        }

In [79]:
def parse_price(block):
    '''
    Tr√≠ch xu·∫•t d·ªØ li·ªáu v·ªÅ gi√° c·ªßa chuy·∫øn xe \n
    Tr·∫£ v·ªÅ: gi√° g·ªëc, gi√° khuy·∫øn m√£i (n·∫øu c√≥), ph·∫ßn trƒÉm khuy·∫øn m√£i (n·∫øu c√≥)
    '''
    
    if has_no_discount_price(block):
        return parse_fare(block)
    else:
        return parse_fare_sale(block)

---

## 2. Bus info parsing

In [80]:
def parser_trip_bus_info(container):
    '''
    Tr√≠ch xu·∫•t th√¥ng tin t·ª´ m·ªôt container ch·ª©a th√¥ng tin chuy·∫øn ƒëi. \n
    Tr·∫£ v·ªÅ Tuple: t√™n nh√† xe, ƒë√°nh gi√° nh√† xe, lo·∫°i gh·∫ø.
    '''
    
    # bus name / company name
    bus_element = container.find('div', class_='bus-name')
    bus_name = bus_element.get_text(strip=True) if bus_element else None

    # bus rating
    rating_element = container.find('div', class_='bus-rating').find('span')
    bus_rating = rating_element.get_text(strip=True) if rating_element else None

    # seat_type
    seat_type = container.find('div', class_='seat-type')
    seat_type = seat_type.get_text(strip=True) if seat_type else None

    return {
        'bus_name': bus_name,
        'bus_rating': bus_rating,
        'seat_type': seat_type
    }

---

## 3. Route parsing

In [81]:
# D·ªØ li·ªáu n√†y n·∫±m ·ªü √¥ filter chuy·∫øn ƒëi

def parse_route_info(block):
    '''
    Tr√≠ch xu·∫•t d·ªØ li·ªáu t·ª´ filter c·ªßa trang web \n
    Tr·∫£ v·ªÅ: ng√†y kh·ªüi h√†nh, n∆°i xu·∫•t ph√°t (th√†nh ph·ªë hi·ªán t·∫°i), n∆°i ƒë·∫øn (n∆°i ƒë·∫∑t v√© ƒë·∫øn)
    '''

    departure_date, start_point, destination = None, None, None

    try:
        departure_date = block.find('p', class_='date-input-value').get_text(strip=True)
        start_point = block.find(id="from_input").get('value')
        destination = block.find(id="to_input").get('value')
    except Exception:
        pass

    return {
        'departure_date': departure_date,
        'start_point': start_point,
        'destination': destination
    }

---

## 4. Details trip info

### 4.1 Departure

In [82]:
# D·ªØ li·ªáu n√†y n·∫±m trong container > 'from_content'

def parse_departure_trip_info(from_content):
    """
    Tr√≠ch xu·∫•t th√¥ng tin ƒëi·ªÉm ƒëi t·ª´ m·ªôt container 'from_content'.
    Tr·∫£ v·ªÅ m·ªôt tuple ch·ª©a: gi·ªù kh·ªüi h√†nh, ƒë·ªãa ƒëi·ªÉm ƒë√≥n kh√°ch.
    """
    # n·∫øu container kh√¥ng t·ªìn t·∫°i, tr·∫£ v·ªÅ gi√° tr·ªã None cho t·∫•t c·∫£
    if not from_content:
        return None, None

    # departure time
    from_hour_tag = from_content.find('div', class_='hour')
    departure_time = from_hour_tag.get_text(strip=True) if from_hour_tag else None

    # departure place
    from_place_tag = from_content.find('div', class_='place')
    pick_up_point = from_place_tag.get_text(strip=True) if from_place_tag else None
    
    return {
        'departure_time': departure_time,
        'pick_up_point': pick_up_point
    }

### 4.2 Arrival

In [83]:
def parse_arrival_trip_info(to_content):
    """
    Tr√≠ch xu·∫•t th√¥ng tin ƒëi·ªÉm ƒë·∫øn t·ª´ m·ªôt container 'to_content'.\n
    Tr·∫£ v·ªÅ m·ªôt tuple ch·ª©a: ng√†y ƒë·∫øn, th·ªùi gian ƒë·∫øn, ƒëi·ªÉm tr·∫£ kh√°ch.
    """
    
    # n·∫øu container kh√¥ng t·ªìn t·∫°i, tr·∫£ v·ªÅ gi√° tr·ªã None cho t·∫•t c·∫£
    if not to_content:
        return None, None, None

    # l·∫•y ng√†y ƒë·∫øn
    date_arrival_tag = to_content.find('span', class_="text-date-arrival-time")
    arrival_date = date_arrival_tag.get_text(strip=True) if date_arrival_tag else None
    
    
    # l·∫•y gi·ªù v√† ƒë·ªãa ƒëi·ªÉm tr·∫£ kh√°ch
    content_to_info = to_content.find('div', class_='content-to-info')
    if content_to_info:
        to_hour_tag = content_to_info.find('div', class_='hour')
        arrival_time = to_hour_tag.get_text(strip=True) if to_hour_tag else None
        
        to_place_tag = content_to_info.find('div', class_='place')
        drop_off_point = to_place_tag.get_text(strip=True) if to_place_tag else None
        
    return {
        'arrival_date': arrival_date,
        'arrival_time': arrival_time,
        'drop_of_point': drop_off_point
    }


---

In [84]:
def get_departure_arrival_trip(container):
    """
    Tr√≠ch xu·∫•t th√¥ng tin chi ti·∫øt v·ªÅ chuy·∫øn ƒëi (gi·ªù, n∆°i ƒëi - ƒë·∫øn, th·ªùi gian di chuy·ªÉn). \n
    Tr·∫£ v·ªÅ: dict ch·ª©a th√¥ng tin kh·ªüi h√†nh, ƒëi·ªÉm ƒë·∫øn v√† th·ªùi l∆∞·ª£ng chuy·∫øn.
    """
    
    # T√¨m kh·ªëi ch·ª©a th√¥ng tin ƒëi v√† ƒë·∫øn
    from_to_content = container.find('div', class_="from-to-content")

    # N·∫øu kh√¥ng t√¨m th·∫•y, tr·∫£ v·ªÅ dict r·ªóng c√≥ c·∫•u tr√∫c s·∫µn
    if not from_to_content:
        return {
            "duration": None,
            "from_hour": None,
            "from_place": None,
            "departure_date": None,
            "arrival_date": None,
            "to_hour": None,
            "to_place": None,
        }

    # L·∫•y th√¥ng tin n∆°i kh·ªüi h√†nh
    from_content = from_to_content.find('div', class_='content from')
    dict_departure_info = parse_departure_trip_info(from_content)

    # L·∫•y th√¥ng tin n∆°i ƒë·∫øn
    to_content = from_to_content.find('div', class_='content to')
    dict_arrival_info = parse_arrival_trip_info(to_content)

    # L·∫•y th·ªùi gian di chuy·ªÉn
    duration_tag = from_to_content.find('div', class_="duration")
    duration = duration_tag.get_text(strip=True) if duration_tag else None

    # G·ªôp to√†n b·ªô th√¥ng tin l·∫°i
    trip_data = dict_departure_info | dict_arrival_info | {'duration': duration}
    
    return trip_data


In [85]:
def gather_trip_info(block):
    '''
    T·∫≠p h·ª£p to√†n b·ªô th√¥ng tin c·ªßa 1 chuy·∫øn xe t·ª´ 1 kh·ªëi d·ªØ li·ªáu (block). \n
    Tr·∫£ v·ªÅ: dict ch·ª©a th√¥ng tin xe, l·ªãch tr√¨nh v√† gi√° v√©.
    '''
    
    # L·∫•y th√¥ng tin ch√≠nh c·ªßa nh√† xe
    dict_bus_info = parser_trip_bus_info(block)
    
    # L·∫•y th√¥ng tin gi·ªù ƒëi - gi·ªù ƒë·∫øn, ƒëi·ªÉm ƒë√≥n - tr·∫£
    dict_trip_details = get_departure_arrival_trip(block)
    
    # L·∫•y th√¥ng tin gi√° v√© (gi√° g·ªëc, gi√° khuy·∫øn m√£i)
    dict_price = parse_price(block)
    
    # G·ªôp t·∫•t c·∫£ d·ªØ li·ªáu v√†o 1 dictionary duy nh·∫•t
    trip_data = dict_bus_info | dict_trip_details | dict_price
    
    return trip_data


---

rating

In [86]:
def extract_rating_from_container(container):
    '''
    Tr√≠ch xu·∫•t th√¥ng tin ƒë√°nh gi√° (rating) c·ªßa t·ª´ng nh√† xe trong 1 container. \n
    Tr·∫£ v·ªÅ: list c√°c c·∫∑p (rate_title, rate_point) ho·∫∑c [(None, None)] n·∫øu kh√¥ng c√≥ d·ªØ li·ªáu.
    '''
    try:
        ratings = []
        rate_divs = container.find_all('div', class_='rate-title')   # T√¨m t·∫•t c·∫£ kh·ªëi ch·ª©a th√¥ng tin ƒë√°nh gi√°
        
        for rate_div in rate_divs:
            rate_ps = rate_div.find_all('p')   # M·ªói ph·∫ßn t·ª≠ ch·ª©a ti√™u ƒë·ªÅ v√† ƒëi·ªÉm
            if len(rate_ps) >= 2:
                rate_title = rate_ps[0].get_text(strip=True)   # Ti√™u ƒë·ªÅ ƒë√°nh gi√°
                rate_point = rate_ps[1].get_text(strip=True)   # ƒêi·ªÉm ƒë√°nh gi√°
                ratings.append((rate_title, rate_point))       # L∆∞u v√†o danh s√°ch
        
        if ratings:
            return ratings     # Tr·∫£ v·ªÅ danh s√°ch n·∫øu c√≥ d·ªØ li·ªáu
        else:
            return [(None, None)]   # Kh√¥ng c√≥ d·ªØ li·ªáu ƒë√°nh gi√°
            
    except Exception:
        return [(None, None)]   # Tr∆∞·ªùng h·ª£p l·ªói v·∫´n tr·∫£ v·ªÅ gi√° tr·ªã m·∫∑c ƒë·ªãnh


total

In [87]:
# G·ªôp d·ªØ li·ªáu chuy·∫øn xe v√† c√°c ƒë√°nh gi√° l·∫°i v·ªõi nhau
def get_all_bus_trip_info(soup):
    '''
    Tr√≠ch xu·∫•t v√† g·ªôp th√¥ng tin chuy·∫øn xe, tuy·∫øn ƒë∆∞·ªùng v√† ƒë√°nh gi√° nh√† xe th√†nh m·ªôt DataFrame duy nh·∫•t. \n
    Tr·∫£ v·ªÅ: DataFrame ch·ª©a to√†n b·ªô d·ªØ li·ªáu chuy·∫øn xe.
    '''

    dict_route = parse_route_info(soup)   # L·∫•y th√¥ng tin tuy·∫øn ƒë∆∞·ªùng (ƒëi - ƒë·∫øn)
    containers = soup.find_all("div", class_="container")   # T√¨m t·∫•t c·∫£ container ch·ª©a chuy·∫øn xe

    lst_trips_info = []

    for container in containers:
        # L·∫•y th√¥ng tin chuy·∫øn xe v√† g·ªôp v·ªõi tuy·∫øn ƒë∆∞·ªùng
        dict_trip_info = gather_trip_info(container) | dict_route   
        df_trip_info = pd.DataFrame([dict_trip_info])  # ƒê∆∞a v·ªÅ 1 h√†ng dataframe

        # L·∫•y d·ªØ li·ªáu ƒë√°nh gi√° c·ªßa nh√† xe
        lst_detail_rating = extract_rating_from_container(container)  
        df_ratings = pd.DataFrame([dict(lst_detail_rating)])  # ƒê∆∞a list -> dict -> df 1 h√†ng

        # G·ªôp 2 dataframe l·∫°i (th√¥ng tin + ƒë√°nh gi√°)
        df = pd.concat([df_trip_info, df_ratings], axis=1)

        lst_trips_info.append(df)

    # G·ªôp to√†n b·ªô chuy·∫øn xe l·∫°i th√†nh 1 dataframe
    all_trips_info = pd.concat(lst_trips_info, ignore_index=True)

    return all_trips_info

---

In [88]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
import time, random
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

## 5. Handle Button logic

### 5.1. Button `Xem th√™m chuy·∫øn`

In [89]:
def click_load_more(driver):
    """
    T·ª± ƒë·ªông nh·∫•n n√∫t 'Xem th√™m chuy·∫øn' nhi·ªÅu l·∫ßn cho ƒë·∫øn khi kh√¥ng c√≤n n√∫t n√†o hi·ªÉn th·ªã.
    D·ª´ng l·∫°i khi trang ƒë√£ t·∫£i h·∫øt t·∫•t c·∫£ chuy·∫øn xe.
    """
    
    while True:
        try:
            load_more_span = driver.find_element(By.XPATH, "//span[text()='Xem th√™m chuy·∫øn']")
            load_more_button = load_more_span.find_element(By.XPATH, "./ancestor::button")
            driver.execute_script("arguments[0].scrollIntoView();", load_more_button)
            load_more_button.click()
            time.sleep(random.uniform(2, 3))
        except NoSuchElementException:
            break
        except ElementClickInterceptedException:
            time.sleep(2)

### 5.2. Button `Xem c√°c ƒë√°nh gi√°`

In [90]:
# def open_all_ratings(driver, max_wait: int = 15):
#     """
#     M·ªü t·∫•t c·∫£ ph·∫ßn ƒë√°nh gi√° (rating) c·ªßa c√°c chuy·∫øn xe sau khi ƒë√£ load to√†n b·ªô danh s√°ch.

#     Parameters
#     ----------
#     driver : webdriver.Chrome
#         ƒê·ªëi t∆∞·ª£ng ƒëi·ªÅu khi·ªÉn tr√¨nh duy·ªát Selenium.
#     max_wait : int
#         Th·ªùi gian t·ªëi ƒëa (gi√¢y) ƒë·ªÉ ch·ªù c√°c ph·∫ßn t·ª≠ rating xu·∫•t hi·ªán.

#     Returns
#     -------
#     bool
#         True n·∫øu ƒë√£ click ƒë∆∞·ª£c √≠t nh·∫•t 1 rating, False n·∫øu kh√¥ng c√≥ ph·∫ßn rating n√†o.
#     """
    
#     try:
#         # Ch·ªù c√°c icon ng√¥i sao xu·∫•t hi·ªán
#         stars = WebDriverWait(driver, max_wait).until(
#             EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".bus-rating-button .anticon-star"))
#         )

#         if not stars:
#             print("‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y ph·∫ßn ƒë√°nh gi√° n√†o trong trang ‚Äî b·ªè qua b∆∞·ªõc click.")
#             return False

#         print(f"‚úÖ T√¨m th·∫•y {len(stars)} ph·∫ßn rating ‚Äî ti·∫øn h√†nh click m·ªü chi ti·∫øt...")

#         for star in stars:
#             try:
#                 driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", star)
#                 time.sleep(1.5)
#                 ActionChains(driver).move_to_element(star).click().perform()
#                 time.sleep(1)
#             except Exception as click_err:
#                 print(f"‚ùå Kh√¥ng th·ªÉ click rating: {click_err}")

#         print("‚úÖ ƒê√£ m·ªü t·∫•t c·∫£ ph·∫ßn ƒë√°nh gi√°.")
#         return True

#     except Exception as e:
#         print("‚ö†Ô∏è  Kh√¥ng th·ªÉ load ho·∫∑c click rating:", e)
#         return False


def open_all_ratings(driver, max_clicks=30, delay_range=(0.5, 1.2)):
    """
    M·ªü t·∫•t c·∫£ c√°c c·ª≠a s·ªï rating tr√™n trang Vexere (n·∫øu c√≥).
    - Gi·ªõi h·∫°n s·ªë l∆∞·ª£t click ƒë·ªÉ tr√°nh treo.
    - T·ª± b·ªè qua khi element kh√¥ng kh·∫£ d·ª•ng.
    """

    wait = WebDriverWait(driver, 10)

    try:
        # L·∫•y to√†n b·ªô c√°c icon ng√¥i sao hi·ªán c√≥
        stars = wait.until(
            EC.presence_of_all_elements_located(
                (By.CLASS_NAME, "bus-rating-button")
            )
        )
        print(f"‚≠ê T√¨m th·∫•y {len(stars)} icon rating.")

        clicked = 0

        for star in stars:
            if clicked >= max_clicks:
                print(f"‚èπ D·ª´ng l·∫°i sau {max_clicks} l·∫ßn click (tr√°nh treo).")
                break

            try:
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", star)
                time.sleep(random.uniform(*delay_range))

                # D√πng click an to√†n b·∫±ng JS (√≠t l·ªói h∆°n ActionChains)
                driver.execute_script("arguments[0].click();", star)
                clicked += 1

                print(f"‚úÖ Click rating {clicked}/{len(stars)}")
                time.sleep(random.uniform(*delay_range))

            except Exception as e:
                print("‚ö†Ô∏è B·ªè qua 1 icon do l·ªói:", e)
                continue

        print("üéØ Ho√†n t·∫•t m·ªü rating!")

    except Exception as e:
        print("‚ùå Kh√¥ng th·ªÉ l·∫•y danh s√°ch rating:", e)

### 5.3. Button `T√¨m ki·∫øm`

In [91]:
def click_search_button(driver):  
    """
    Click v√†o n√∫t t√¨m ki·∫øm tr√™n trang Vexere.

    Parameters
    ----------
    driver : webdriver
        ƒê·ªëi t∆∞·ª£ng Selenium WebDriver ƒëang ƒëi·ªÅu khi·ªÉn tr√¨nh duy·ªát.

    Returns
    -------
    bool
        True n·∫øu click th√†nh c√¥ng, False n·∫øu x·∫£y ra l·ªói.
    """
    
    try:
        button = driver.find_element(By.CLASS_NAME,"button-search")
        button.click()
        return True
    except Exception:
        return False

## 6. Automate the process of filtering website data

In [92]:
def target_time(days=0):
    """
    Tr·∫£ v·ªÅ ng√†y v√† th√°ng-nƒÉm m·ª•c ti√™u c√°ch hi·ªán t·∫°i `days` ng√†y.

    Parameters
    ----------
    days : int, optional
        S·ªë ng√†y c·ªông th√™m t·ª´ ng√†y hi·ªán t·∫°i (m·∫∑c ƒë·ªãnh = 0).

    Returns
    -------
    dict
        {'day': '15', 'month_year': '10-2025'}
    """
    target_date = datetime.today() + timedelta(days=days)  # Ng√†y m·ª•c ti√™u = ng√†y hi·ªán t·∫°i + kho·∫£ng th·ªùi gian sau k days ng√†y
    month_id = f"{target_date.month:02d}-{target_date.year}"
    day = str(target_date.day)
    return {
        'day':day,
        'month_year':month_id
    }

In [93]:
def filter_box(driver,start_city:str, destination_city:str, days=0):
    """
    Ch·ªçn ƒëi·ªÉm ƒëi, ƒëi·ªÉm ƒë·∫øn v√† ng√†y kh·ªüi h√†nh tr√™n trang Vexere.

    Parameters
    ----------
    driver : webdriver
        ƒê·ªëi t∆∞·ª£ng Selenium WebDriver ƒëang ƒëi·ªÅu khi·ªÉn tr√¨nh duy·ªát.
    start_city : str
        T√™n th√†nh ph·ªë kh·ªüi h√†nh.
    destination_city : str
        T√™n th√†nh ph·ªë ƒëi·ªÉm ƒë·∫øn.
    days : int, optional
        S·ªë ng√†y t√≠nh t·ª´ h√¥m nay ƒë·ªÉ ch·ªçn ng√†y ƒëi (m·∫∑c ƒë·ªãnh = 0).

    Returns
    -------
    bool
        True n·∫øu ch·ªçn ng√†y th√†nh c√¥ng, False n·∫øu x·∫£y ra l·ªói.
    """

    # T√¨m √¥ ch·ª©a n∆°i kh·ªüi h√†nh v√† n∆°i ƒë·∫øn
    box_department = driver.find_element(By.ID, 'from_input')
    box_arrival = driver.find_element(By.ID, 'to_input')

    # Nh·∫≠p d·ªØ li·ªáu
    box_department.send_keys(start_city)
    box_arrival.send_keys(destination_city)
    
    # click v√†o ch·ªçn ng√†y ƒëi ƒë·ªÉ hi·ªÉn th·ªã c√°c l·ª±a ch·ªçn
    driver.find_element(By.CLASS_NAME, "departure-date-select").click() 
    time.sleep(1)

    # G·ªçi h√†m target_time() -> ng√†y, th√°ng m√¨nh mu·ªën hi·ªÉn th·ªã t·ª´ trang web
    target_day, target_month = target_time(days).values() 

    # Ch·ªçn ng√†y trong month_section
    month_section = driver.find_element(By.ID, target_month)
    day_elements = month_section.find_elements(By.CSS_SELECTOR, "p.day")
    
    for day in day_elements:
        if day.text == target_day:
            try:
                day.click()
                break
            except Exception:
                return False
            
    return True

### 7. Select arrival place

In [94]:
import pandas as pd
df = pd.read_csv('../../data/raw/routes.csv')
df

Unnamed: 0,start_point,destination_1,destination_2,destination_3,destination_4,destination_5,destination_6,destination_7,destination_8,destination_9,destination_10
0,H√† N·ªôi,H·∫£i Ph√≤ng,Ngh·ªá An,S∆°n La,H√† Giang,Qu·∫£ng Ninh,Thanh H√≥a,SaPa,Ninh B√¨nh,,
1,Qu·∫£ng Ninh,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
2,Ninh B√¨nh,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
3,ƒê√† N·∫µng,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
4,S√†i G√≤n,B√¨nh Thu·∫≠n,Ninh Thu·∫≠n,ƒê·∫Øk L·∫Øk,Gia Lai,Ph√∫ Y√™n,Nha Trang,B√† R·ªãa - V≈©ng T√†u,,,
5,Sa Pa,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
6,V≈©ng T√†u,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
7,ƒê√† L·∫°t,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
8,Nha Trang,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt
9,Phan Thi·∫øt,H√† N·ªôi,Qu·∫£ng Ninh,Ninh B√¨nh,ƒê√† N·∫µng,S√†i G√≤n,Sa Pa,V≈©ng T√†u,ƒê√† L·∫°t,Nha Trang,Phan Thi·∫øt


# FLOW OFFICIAL

---

### check data trong database

In [95]:
# def open_all_ratings(driver, max_wait: int = 15, db_connection=None):
#     """
#     M·ªü c√°c ƒë√°nh gi√° ch∆∞a c√≥ trong database.

#     Parameters
#     ----------
#     driver : webdriver.Chrome
#         ƒê·ªëi t∆∞·ª£ng ƒëi·ªÅu khi·ªÉn tr√¨nh duy·ªát Selenium.
#     max_wait : int
#         Th·ªùi gian t·ªëi ƒëa (gi√¢y) ƒë·ªÉ ch·ªù c√°c ph·∫ßn t·ª≠ rating xu·∫•t hi·ªán.
#     db_connection : database connection object
#         K·∫øt n·ªëi ƒë·∫øn database ƒë·ªÉ ki·ªÉm tra d·ªØ li·ªáu

#     Returns
#     -------
#     bool
#         True n·∫øu ƒë√£ click ƒë∆∞·ª£c √≠t nh·∫•t 1 rating, False n·∫øu kh√¥ng c√≥ ph·∫ßn rating n√†o.
#     """
#     try:
#         # Ch·ªù c√°c container ch·ª©a th√¥ng tin chuy·∫øn xe xu·∫•t hi·ªán
#         containers = WebDriverWait(driver, max_wait).until(
#             EC.presence_of_all_elements_located((By.CLASS_NAME, "container"))
#         )

#         if not containers:
#             print("‚ö†Ô∏è  Kh√¥ng t√¨m th·∫•y chuy·∫øn xe n√†o trong trang.")
#             return False

#         for container in containers:
#             try:
#                 # L·∫•y th√¥ng tin nh·∫≠n d·∫°ng chuy·∫øn xe (v√≠ d·ª•: t√™n nh√† xe, gi·ªù kh·ªüi h√†nh)
#                 bus_name = container.find_element(By.CLASS_NAME, "bus-name").text
#                 departure_time = container.find_element(By.CLASS_NAME, "hour").text
                
#                 # Ki·ªÉm tra trong database
#                 if db_connection:
#                     cursor = db_connection.cursor()
#                     cursor.execute("""
#                         SELECT id FROM bus_trips 
#                         WHERE bus_name = %s AND departure_time = %s
#                     """, (bus_name, departure_time))
                    
#                     if cursor.fetchone():
#                         print(f"Skip rating for {bus_name} - {departure_time} (already in DB)")
#                         continue

#                 # N·∫øu ch∆∞a c√≥ trong DB, m·ªü rating
#                 star = container.find_element(By.CSS_SELECTOR, ".bus-rating-button .anticon-star")
#                 driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", star)
#                 time.sleep(1.5)
#                 ActionChains(driver).move_to_element(star).click().perform()
#                 time.sleep(1)
                
#             except Exception as click_err:
#                 print(f"‚ùå L·ªói x·ª≠ l√Ω container: {click_err}")
#                 continue

#         print("‚úÖ ƒê√£ x·ª≠ l√Ω t·∫•t c·∫£ c√°c ƒë√°nh gi√°.")
#         return True

#     except Exception as e:
#         print("‚ö†Ô∏è  L·ªói chung:", e)
#         return False

## Crawl rating data

# -- Main --

In [96]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import time

flow: filter_box(driver, 'S√†i G√≤n/H√† N·ªôi', list, days=for i in range) -> click_search_button(driver)

In [97]:

# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# driver.get("https://vexere.com/vi-VN/ve-xe-khach-tu-ha-noi-di-thanh-hoa-124t1561.html?date=15-10-2025&v=8&nation=84")

# # Click 'xem th√™m chuy·∫øn' ƒë·∫øn h·∫øt -> hi·ªÉn th·ªã t·∫•t c·∫£ c√°c chuy·∫øn xe
# click_load_more(driver)


# open_all_ratings(driver)
# time.sleep(1.5)
# soup = BeautifulSoup(driver.page_source, "html.parser")

# lst_trips_info = []
# df_trips_info = get_all_bus_trip_info(soup)
# with open('hn_th_15102025.html', 'w', encoding='utf-8') as f:
#     f.write(soup.prettify())

# df_trips_info.to_csv("../../data/raw/hn_th_15102025.csv", index=False)
# driver.quit()


In [98]:
arrivals_HaNoi = ['H·∫£i Ph√≤ng','Ngh·ªá An','S∆°n La','H√† Giang','Qu·∫£ng Ninh','Thanh H√≥a'',SaPa','Ninh B√¨nh']
arrivals_SaiGon = ['Gia Lai','B√¨nh Thu·∫≠n','Ninh Thu·∫≠n','ƒê·∫Øk L·∫Øk','Ph√∫ Y√™n','Nha Trang','B√† R·ªãa - V≈©ng T√†u']

In [99]:
URL = 'https://vexere.com/'
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(URL)

In [None]:
departure_place = 'S√†i G√≤n' # N∆°i xu·∫•t ph√°t
days = 1

day, month_year = target_time(days).values()
month_year = day.replace('-', '_')

for i in arrivals_SaiGon:
    arrival_place = i

    
    
    filter_sucess = filter_box(driver, start_city=departure_place, destination_city=arrival_place, days=days)

    if filter_sucess:
        search_suess = click_search_button(driver)
    
    click_load_more(driver)

    if search_suess:
        open_all_ratings(driver)

    time.sleep(1.5)

    soup = BeautifulSoup(driver.page_source, "html.parser")

    lst_trips_info = []

    try:
        df_trips_info = get_all_bus_trip_info(soup)
    except Exception:

        with open(f'../../data/site/{departure_place}_{arrival_place}_{day}_{month_year}.html', 'w', encoding='utf-8') as f:
            f.write(soup.prettify())
        
        print("ƒê√£ l∆∞u th√†nh .html")

    df_trips_info.to_csv(f"../../data/raw/{departure_place}_{arrival_place}_{day}_{month_year}.csv", index=False)
    print("L·∫•y d·ªØ li·ªáu .csv th√†nh c√¥ng")

    driver.quit()
    break


‚≠ê T√¨m th·∫•y 20 icon rating.
‚úÖ Click rating 1/20
‚úÖ Click rating 2/20
‚úÖ Click rating 3/20
‚úÖ Click rating 4/20
‚úÖ Click rating 5/20
‚úÖ Click rating 6/20
‚úÖ Click rating 7/20
‚úÖ Click rating 8/20
‚úÖ Click rating 9/20
‚úÖ Click rating 10/20
‚úÖ Click rating 11/20
‚úÖ Click rating 12/20
‚úÖ Click rating 13/20
‚úÖ Click rating 14/20
‚úÖ Click rating 15/20
‚úÖ Click rating 16/20
‚úÖ Click rating 17/20
‚úÖ Click rating 18/20
‚úÖ Click rating 19/20
‚úÖ Click rating 20/20
üéØ Ho√†n t·∫•t m·ªü rating!
L·∫•y d·ªØ li·ªáu .csv th√†nh c√¥ng


In [101]:
df_trips_info

Unnamed: 0,bus_name,bus_rating,seat_type,departure_time,pick_up_point,arrival_date,arrival_time,drop_of_point,duration,original_price,...,departure_date,start_point,destination,An to√†n,Th√¥ng tin ch√≠nh x√°c,Th√¥ng tin ƒë·∫ßy ƒë·ªß,Th√°i ƒë·ªô nh√¢n vi√™n,Ti·ªán nghi & tho·∫£i m√°i,Ch·∫•t l∆∞·ª£ng d·ªãch v·ª•,ƒê√∫ng gi·ªù
0,Ho√†ng Thu·ª∑,4.7 (3363),Limousine 34 Gi∆∞·ªùng VIP,18:45,‚Ä¢ B·∫øn xe Mi·ªÅn ƒê√¥ng - Qu·∫ßy v√© 37,(18/10),05:45,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,11h,350.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.8,4.7,4.6,4.6,4.6,4.6,4.9
1,Vi·ªát T√¢n Ph√°t,4.6 (92),34 Ph√≤ng,17:45,‚Ä¢ B·∫øn xe Mi·ªÅn ƒê√¥ng,(18/10),04:55,‚Ä¢ B·∫øn xe ƒê·∫Øk ƒêoa,11h10m,370.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.7,4.7,4.7,4.5,4.6,4.5,4.6
2,Sinh Di√™n H·ªìng,4.6 (227),Limousine 34 ch·ªó,17:30,‚Ä¢ B·∫øn xe An S∆∞∆°ng,(18/10),04:15,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,10h45m,370.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.6,4.6,4.8,4.7,4.6,4.5,4.8
3,K√≠nh Di√™n H·ªìng,4.6 (1127),Limousine gi∆∞·ªùng n·∫±m 34 ch·ªó,18:40,‚Ä¢ B·∫øn Xe Mi·ªÅn ƒê√¥ng - Qu·∫ßy v√© 19,(18/10),05:40,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,11h,350.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.7,4.6,4.7,4.6,4.5,4.5,4.5
4,An Ph√°t,4.5 (375),Limosuine 24 Ph√≤ng,17:30,‚Ä¢ BX Mi·ªÅn ƒê√¥ng - C·ªïng 4,(18/10),07:15,‚Ä¢ B·∫øn Xe K-Bang,13h45m,550.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.6,4.5,4.6,4.5,4.3,4.5,4.8
5,V∆∞∆°ng T·∫•n D≈©ng,4.8 (372),Limousine 24 Ph√≤ng,17:45,‚Ä¢ VP QL13,(18/10),06:00,‚Ä¢ B·∫øn xe An Kh√™,12h15m,500.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.8,4.7,4.8,4.8,4.8,4.8,4.8
6,ƒê·ª©c ƒê·∫°t,4.7 (974),Limousine gi∆∞·ªùng n·∫±m 34 ch·ªó,18:25,‚Ä¢ B·∫øn xe Mi·ªÅn ƒê√¥ng - Qu·∫ßy v√© 17,(18/10),04:50,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,10h25m,350.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.6,4.5,4.5,4.5,4.4,4.4,4.8
7,Thu·∫≠n Ti·∫øn,4.7 (4930),Limousine 34 gi∆∞·ªùng,18:30,‚Ä¢ B·∫øn Xe Mi·ªÅn ƒê√¥ng - Qu·∫ßy v√© 24,(18/10),06:55,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,12h25m,350.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.7,4.7,4.8,4.6,4.6,4.6,4.8
8,T·∫•n H∆∞ng (Gia Lai),4.4 (101),Limousine 24 Ph√≤ng,17:30,‚Ä¢ B·∫øn Xe Mi·ªÅn ƒê√¥ng - Ph√≤ng v√© s·ªë 77,(18/10),07:55,‚Ä¢ B·∫øn xe kh√°ch Kbang,14h25m,650.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.5,4.2,4.4,4.5,4.4,4.4,3.6
9,B·∫£y Lang,4.7 (271),Limousine 34 gi∆∞·ªùng,18:30,"‚Ä¢ B·∫øn xe mi·ªÅn ƒë√¥ng c≈© (D√£y s·ªë 6, √î C14)",(18/10),05:30,‚Ä¢ B·∫øn Xe ƒê·ª©c Long Gia Lai,11h,350.0,...,"T6, 17/10/2025",S√†i G√≤n,Gia Lai,4.8,4.6,4.6,4.7,4.5,4.5,4.4
