In [22]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL for the British Airways reviews
ba_review_link = 'https://www.airlinequality.com/airline-reviews/british-airways/?sortby=post_date%3ADesc&pagesize=100000'

# Get the page content
res = requests.get(ba_review_link)
soup = BeautifulSoup(res.content, 'html.parser')

# Initialize lists to store data
data = []

# Extract data from each review article
for sp in soup.find_all('article', itemprop='review'):
    review_data = {}
    
    # Scraping the Total Rating
    rating_value = sp.find('span', itemprop='ratingValue')
    if rating_value:
        review_data['Total Rating'] = rating_value.text
    
    # Scraping the review text
    review_header = sp.find('h2', class_='text_header')
    if review_header:
        review_data['Review Summary'] = review_header.get_text(strip=True)
    
    # Extract User's name and Country
    user_info = sp.find('h3', class_='text_sub_header userStatusWrapper')
    if user_info:
        user_name = user_info.find('span', itemprop='name')
        if user_name:
            review_data['Name'] = user_name.get_text(strip=True)
        
        # Extract Country, handle multiple spaces and text around it
        country_info = user_name.next_sibling
        if country_info and isinstance(country_info, str):
            country = country_info.strip('() ')
            review_data['Country'] = country
    
    # Extract the review text and verify if it's a verified trip
    review_div = sp.find('div', class_='text_content', itemprop='reviewBody')
    if review_div:
        review_data['Verified'] = 'yes' if '✅' in review_div.get_text() else 'no'
    
    # Extract table data
    review_tables = sp.find_all('table', class_='review-ratings')
    for table in review_tables:
        rows = table.find_all('tr')
        for row in rows:
            key_cell = row.find('td', class_='review-rating-header')
            value_cell = row.find('td', class_='review-value') or row.find('td', class_='review-rating-stars')
            if key_cell and value_cell:
                key = key_cell.get_text(strip=True)
                if 'stars' in value_cell.get('class', []):
                    value = len(value_cell.find_all('span', class_='star fill'))
                else:
                    value = value_cell.get_text(strip=True)
                review_data[key] = value
    
    # Append the review data to the list
    if 'Name' in review_data:  # Ensure the user name is present
        data.append(review_data)

# Create a DataFrame and save it to a CSV file
df = pd.DataFrame(data)
df.to_csv('british_airways_reviews.csv', index=False)

print("Data extracted and saved to 'british_airways_reviews.csv'")


Data extracted and saved to 'british_airways_reviews.csv'
