In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time
import re
import math
import os

def create_driver():
    """Create a Chrome WebDriver with configured options."""
    options = Options()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    options.add_argument("user-agent=Mozilla/5.0")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_argument("--disable-blink-features=AutomationControlled")
    return webdriver.Chrome(service=Service(), options=options)

def main():
    """Scrape Ethiopian Airlines reviews from AirlineQuality and save to CSV."""
    driver = create_driver()
    base_url = (
        "https://www.airlinequality.com/airline-reviews/ethiopian-airlines"
        "?sortby=post_date%3ADesc&pagesize=100"
    )
    driver.get(base_url)
    time.sleep(2)

    total_text = driver.find_element(By.CSS_SELECTOR, "div.pagination-total").text
    total = int(re.search(r"of\s+(\d+)", total_text).group(1))
    total_pages = math.ceil(total / 100)

    reviews = []
    for page in range(1, total_pages + 1):
        url = f"{base_url}&page={page}"
        driver.get(url)
        time.sleep(2)
        print(f"Scraping page {page}...")

        articles = driver.find_elements(By.CSS_SELECTOR, "article[itemprop='review']")
        print(f"Found {len(articles)} reviews")

        for i, art in enumerate(articles, 1):
            try:
                title = art.find_element(By.CSS_SELECTOR, "h2.text_header").text.strip()
            except:
                title = ""

            try:
                comment = art.find_element(By.CSS_SELECTOR, "div.text_content[itemprop='reviewBody']").text.strip()
            except:
                comment = ""

            try:
                user_info = art.find_element(By.CSS_SELECTOR, "h3.text_sub_header.userStatusWrapper").text
                match = re.search(r"\(([^)]+)\)", user_info)
                author_country = match.group(1).strip() if match else "Unknown"
            except:
                author_country = "Unknown"

            date_flown = "Unknown"
            route = "Unknown"
            rows = art.find_elements(By.CSS_SELECTOR, "table.review-ratings tr")
            for row in rows:
                try:
                    label = row.find_element(By.CSS_SELECTOR, "td.review-rating-header").text.strip()
                    value = row.find_element(By.CSS_SELECTOR, "td.review-value").text.strip()
                    if label == "Date Flown":
                        date_flown = value
                    elif label == "Route":
                        route = value
                except:
                    continue

            ratings = {}
            for row in rows:
                try:
                    svc = row.find_element(By.CSS_SELECTOR, "td.review-rating-header").text.strip()
                    key = svc.lower().replace("&", "and").replace(" ", "_")
                    filled = row.find_elements(By.CSS_SELECTOR, "td.review-rating-stars span.star.fill")
                    ratings[key] = len(filled)
                except:
                    continue

            reviews.append({
                "title": title,
                "author_country": author_country,
                "comment": comment,
                "date_flown": date_flown,
                "route": route,
                **ratings
            })

            print(f"Review {i} on page {page} collected.")

    driver.quit()

    # Save to relative path for GitHub
    output_path = "scraped_data/scraped_airlinequality_ethiopian_airlines_reviews.csv"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    pd.DataFrame(reviews).to_csv(output_path, index=False)
    print(f"\nDone! Saved {len(reviews)} reviews to: {output_path}")

if __name__ == "__main__":
    main()
