In [8]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [9]:
def fetch_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        print('Fetch OK')
        return response.text
    else:
        print('Fetch ERROR')
        return None

def save_html(html, filename):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(html)

def load_html(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        return f.read()

In [10]:
def parse_price(block):
    fare_sale = block.find('div', class_='fare-sale')
    fare = block.find('div', class_='fare')
    small = block.find('div', class_='small')
    percent = block.find('div', class_='percent')

    # Discounted price
    if fare_sale:
        price_discount = fare_sale.get_text(strip=True).replace("From", "").replace("đ", "").strip()
    elif fare:
        price_discount = fare.get_text(strip=True).replace("đ", "").strip()
    else:
        price_discount = None

    # Original price
    if small and small.get_text(strip=True):
        price_original = small.get_text(strip=True).replace("đ", "").strip()
    else:
        price_original = price_discount

    # Discount percent
    discount_percent = percent.get_text(strip=True) if percent else None

    return [price_original, price_discount, discount_percent]

In [11]:
def parse_bus_info(container):
    bus_name = container.find('div', class_='bus-name').text.strip() if container.find('div', class_='bus-name') else None
    bus_rating = container.find('div', class_='bus-rating').find('span').text.strip() if container.find('div', class_='bus-rating') and container.find('div', class_='bus-rating').find('span') else None
    seat_type = container.find('div', class_='seat-type').get_text(strip=True) if container.find('div', class_='seat-type') else None

    from_to_content = container.find('div', class_="from-to-content")
    if from_to_content:
        to_content = from_to_content.find('div', class_='content to')
        from_content = from_to_content.find('div', class_='content from')
        duration = from_to_content.find('div', class_="duration").get_text(strip=True) if from_to_content.find('div', class_="duration") else None

        # Arrival info
        date_arrival = None
        to_hour = None
        to_place = None
        if to_content:
            span = to_content.find('span', class_="text-date-arrival-time")
            date_arrival = span.get_text(strip=True) if span else None
            content_to_info = to_content.find('div', class_='content-to-info')
            if content_to_info:
                to_hour = content_to_info.find('div',class_='hour' ).get_text(strip=True) if content_to_info.find('div',class_='hour' ) else None
                to_place = content_to_info.find('div',class_='place' ).get_text(strip=True) if content_to_info.find('div',class_='place' ) else None

        # Departure info
        from_hour = from_content.find('div',class_='hour' ).get_text(strip=True) if from_content and from_content.find('div',class_='hour' ) else None
        from_place = from_content.find('div',class_='place' ).get_text(strip=True) if from_content and from_content.find('div',class_='place' ) else None
    else:
        duration = None
        date_arrival = None
        to_hour = None
        to_place = None
        from_hour = None
        from_place = None

    price_original, price_discount, discount_percent = parse_price(container)

    notification = container.find('div', class_='link')
    notification = notification.get_text(strip=True) if notification else None

    return [
        bus_name, bus_rating, seat_type,
        from_hour, from_place, duration,
        to_hour, to_place, date_arrival,
        price_original, price_discount, discount_percent, notification
    ]

In [12]:
def extract_all_bus_info(soup):
    containers = soup.find_all("div", class_="container")
    bus_info_list = [parse_bus_info(container) for container in containers]
    return bus_info_list

In [13]:
# Main workflow
URL = 'https://vexere.com/vi-VN/ve-xe-khach-tu-sai-gon-di-nha-trang-khanh-hoa-129t23591.html?date=27-09-2025&nation=84&ts=1758796742310'
html = fetch_html(URL)
if html:
    save_html(html, "data_site.html")

html_content = load_html("data_site.html")
soup = BeautifulSoup(html_content, 'html.parser')
bus_data = extract_all_bus_info(soup)
df_bus_info = pd.DataFrame(bus_data, columns=[
    'bus_name', 'bus_rating', 'seat_type',
    'from_hour', 'from_place', 'duration',
    'to_hour', 'to_place', 'date_arrival',
    'price_original', 'price_discount', 'discount_percent', 'notification'
])

Fetch OK


In [14]:
df_bus_info.to_csv("bus_info.csv", index=False)
df_bus_info

Unnamed: 0,bus_name,bus_rating,seat_type,from_hour,from_place,duration,to_hour,to_place,date_arrival,price_original,price_discount,discount_percent,notification
0,Đà Lạt ơi,4.8 (3405),Limousine 24 Phòng ĐÔI,23:45,• Trạm Quận 1,5h45m,05:30,• Trạm Nha Trang,(03/10),450.000,Từ 350.000,-22%,Lộ trình: Hàng Xanh - Quận 1 - Cao tốc đến Cam...
1,Khanh Phong,4.7 (16769),Limousine 32 giường nằm (WC),13:05,• Văn Phòng Phạm Ngũ Lão - Quận 1.,6h10m,19:15,• Văn Phòng Nha Trang (KS Mường Thanh),,320.000,300.000,-6%,Quý khách lưu ý
2,Huỳnh Gia,4.7 (8497),Giường nằm 38 chỗ (WC),22:30,• Văn Phòng Phạm Ngũ Lão,6h30m,05:00,• Văn Phòng Nha Trang,(03/10),280.000,250.000,-11%,Lộ trình: Cao tốc (Long Thành - Dầu Giây - Pha...
3,An Anh Limousine,4.8 (8268),Limousine 34 Phòng Đơn,23:30,• Văn Phòng Quận 5,6h30m,06:00,• Văn phòng Nha Trang,(03/10),299.000,250.000,-16%,Tiện ích miễn phí
4,Bình Minh Tải,4.7 (3615),Limousine 22 Phòng Đơn,22:30,• Văn Phòng Quận 1,7h5m,05:35,• Văn phòng Nha Trang,(03/10),Từ 350.000,Từ 350.000,,Hướng đi: Cao tốc (Long Thành - Dầu Giây - Pha...
5,Nhật Dương - Bình Minh Bus,4.9 (5802),Limousine phòng 21 đôi (WC),05:00,• Văn phòng Nguyễn Cư Trinh Quận 1,7h50m,12:50,• Vp Thích Quảng Đức Nha Trang,,349.000,Từ 270.000,-23%,NX xuất HĐ VAT - Hướng đi: Cao tốc
6,Nam Hải Limousine,4.7 (3440),Limousine 34 giường,22:20,• Văn Phòng Phạm Ngũ Lão,8h,06:20,• Văn Phòng Nha Trang,(03/10),300.000,270.000,-10%,Lưu ý Đón/Trả tại TP.HCM
7,Bus365,4.7 (164),Limousine 24 phòng đôi,22:00,• Bến Xe Miền Đông Mới,6h,04:00,• Văn Phòng Nha Trang,(03/10),400.000,Từ 289.000,-28%,Vé Metro miễn phí
8,Trà Lan Viên,4.4 (4188),Limousine 21 Phòng Đơn (WC),13:00,• Vp. Quận 1,7h20m,20:20,• VP Hà Quang 2,,470.000,350.000,-26%,Quý khách lưu ý giường cuối
9,Trọng Thủy Limousine,4.7 (1342),Limousine 24 phòng Đôi,21:00,• Ngã 4 An Sương,7h,04:00,• Văn phòng Nha Trang,(03/10),470.000,Từ 390.000,-17%,Kích thước giường cuối


In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, random

driver = webdriver.Chrome()
driver.get("https://vexere.com/vi-VN/ve-xe-khach-tu-sai-gon-di-nha-trang-khanh-hoa-129t23591.html?date=11-10-2025&v=6")


button = driver.find_element(By.CSS_SELECTOR, ".ant-btn.bus-rating-button")
while button:
    button.click()
    button = driver.find_element(By.CSS_SELECTOR, ".ant-btn.bus-rating-button")
    time.sleep(random.randint(3,4))


ElementClickInterceptedException: Message: element click intercepted: Element is not clickable at point (591, -6809)
  (Session info: chrome=140.0.7339.208); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#elementclickinterceptedexception
Stacktrace:
	GetHandleVerifier [0x0x7ff634d01eb5+80197]
	GetHandleVerifier [0x0x7ff634d01f10+80288]
	(No symbol) [0x0x7ff634a802fa]
	(No symbol) [0x0x7ff634adfe69]
	(No symbol) [0x0x7ff634add7ee]
	(No symbol) [0x0x7ff634ada731]
	(No symbol) [0x0x7ff634ad9620]
	(No symbol) [0x0x7ff634acabc8]
	(No symbol) [0x0x7ff634b0037a]
	(No symbol) [0x0x7ff634aca456]
	(No symbol) [0x0x7ff634b00590]
	(No symbol) [0x0x7ff634b287fb]
	(No symbol) [0x0x7ff634b00153]
	(No symbol) [0x0x7ff634ac8b02]
	(No symbol) [0x0x7ff634ac98d3]
	GetHandleVerifier [0x0x7ff634fbe83d+2949837]
	GetHandleVerifier [0x0x7ff634fb8c6a+2926330]
	GetHandleVerifier [0x0x7ff634fd86c7+3055959]
	GetHandleVerifier [0x0x7ff634d1cfee+191102]
	GetHandleVerifier [0x0x7ff634d250af+224063]
	GetHandleVerifier [0x0x7ff634d0af64+117236]
	GetHandleVerifier [0x0x7ff634d0b119+117673]
	GetHandleVerifier [0x0x7ff634cf10a8+11064]
	BaseThreadInitThunk [0x0x7ffc5096e8d7+23]
	RtlUserThreadStart [0x0x7ffc52368d9c+44]


In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, random

load_more_span = driver.find_element(By.XPATH, "//span[text()='Xem thêm chuyến']")
load_more_button = load_more_span.find_element(By.XPATH, "./ancestor::button")
load_more_button.click()

InvalidSessionIdException: Message: invalid session id; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
	GetHandleVerifier [0x0x7ff634d01eb5+80197]
	GetHandleVerifier [0x0x7ff634d01f10+80288]
	(No symbol) [0x0x7ff634a8011c]
	(No symbol) [0x0x7ff634ac7c1d]
	(No symbol) [0x0x7ff634b00242]
	(No symbol) [0x0x7ff634afac14]
	(No symbol) [0x0x7ff634af9cca]
	(No symbol) [0x0x7ff634a4a755]
	GetHandleVerifier [0x0x7ff634fbe83d+2949837]
	GetHandleVerifier [0x0x7ff634fb8c6a+2926330]
	GetHandleVerifier [0x0x7ff634fd86c7+3055959]
	GetHandleVerifier [0x0x7ff634d1cfee+191102]
	GetHandleVerifier [0x0x7ff634d250af+224063]
	(No symbol) [0x0x7ff634a49731]
	GetHandleVerifier [0x0x7ff6350eb568+4182008]
	BaseThreadInitThunk [0x0x7ffc5096e8d7+23]
	RtlUserThreadStart [0x0x7ffc52368d9c+44]


In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time, random

driver = webdriver.Chrome()
driver.get("https://vexere.com/vi-VN/ve-xe-khach-tu-sai-gon-di-nha-trang-khanh-hoa-129t23591.html?date=11-10-2025&v=6")

button = driver.find_element(By.CSS_SELECTOR, ".ant-btn.bus-rating-button")
while True:
    try:
        button.click()
        time.sleep(random.randint(3, 4))
        button = driver.find_element(By.CSS_SELECTOR, ".ant-btn.bus-rating-button")
    except Exception:
        break

In [None]:
data_rating_site = BeautifulSoup(html, 'html.parser').prettify()
save_html(data_rating_site, "data_rating_site.html")