In [2]:
import requests
from bs4 import BeautifulSoup
url = 'https://www.airlinequality.com/airline-reviews/british-airways'

In [44]:
reviews = {'name':[],'country':[], 'date':[], 'trip_verified':[],'rating':[],
           'aircraft':[], 'type_of_traveller':[], 'seat_type':[], 'route':[],
           'date_flown':[], 'seat_comfort': [],
    #            'cabin_staff_service': cabin_staff_service,
    #            'food_beverages': food_beverages,
    #            'entertainment': entertainment,
    #            'ground_service': ground_service,
    #            'wifi': wifi,
    #            'value_for_money': value_for_money,
    #            'recommended': recommended
            }


def find_country(single_review):
  """Finds the country the reviewer is from."""
  h3_tag = single_review.find('h3', class_='text_sub_header userStatusWrapper')
  country_element = h3_tag.text
  value = country_element.split('(')[-1].split(')')[0].strip() # .get_text(strip=True)
  reviews.setdefault('country', []).append(value)



def is_verified(single_review) -> None:
  """Finds if the reviewer is verified or not."""
  present = single_review.find('div', {'itemprop': 'reviewBody'}).get_text(strip=True)
  if 'Trip Verified' in present:
    value = 'Trip Verified'
  elif 'Not Verified' in present:
    value = 'Not Verified'
  else:
    value = 'Not Found'
  reviews.setdefault('trip_verified', []).append(value)



def find_name(single_review):
  """Finds the name of the reviewer."""
  value = single_review.find('span', {'itemprop': 'name'}).get_text(strip=True)
  reviews.setdefault('name', []).append(value)



def find_date(single_review):
  """Finds the date on which review was given."""
  value = single_review.find('time').get('datetime')
  reviews.setdefault('date', []).append(value)

def find_rating(single_review):
  """Finds the overall rating of the reviewer."""
  value = single_review.find('span', {'itemprop':'ratingValue'}).get_text(strip=True)
  reviews.setdefault('rating', []).append(value)


def find_aircraft(single_review):
  """Finds if the review includes an aircraft type,
  if True the type of aircraft will be appended,
  else appends None to reviews dic."""
  value = single_review.find('td', {'class':'review-rating-header aircraft'})
  if value:
    value = single_review.find('td', {'class':'review-value'})
    reviews.setdefault('aircraft', []).append(value.get_text(strip=True))
  else:
    reviews.setdefault('aircraft', []).append(None)

def find_traveller(single_review):
  """Finds the type of traveller mentioned in the review."""
  value = single_review.find('td', {'class':'review-rating-header type_of_traveller'}).find_next('td', {'class':'review-value'})
  reviews.setdefault('type_of_traveller', []).append(value.get_text(strip=True))

def find_seat(single_review):
  """Finds the type of seat used by the reviewer."""
  value = single_review.find('td', {'class':'review-rating-header cabin_flown'}).find_next('td', {'class':'review-value'})
  reviews.setdefault('seat_type', []).append(value.get_text(strip=True))


def find_route(single_review):
  """Finds the travel route taken by the reviewer."""
  value = single_review.find('td', {'class':'review-rating-header route'}).find_next('td', {'class':'review-value'})
  reviews.setdefault('route', []).append(value.get_text(strip=True))

def find_date_flown(single_review):
  """Finds the date the reviewer has flown."""
  value = single_review.find('td', {'class':'review-rating-header date_flown'}).find_next('td', {'class':'review-value'})
  reviews.setdefault('date_flown', []).append(value.get_text(strip=True))


def find_seat_comfort(single_review):
  """Finds the star rating given in the review for 'Seat Comfort'."""
  value = single_review.find('td',
                            {'class':'review-rating-header seat_comfort'}).find_next('td',
                            {'class':'review-rating-stars stars'}).find_all('span', class_='star fill')
  star_rating = len(value)
  reviews.setdefault('seat_comfort', []).append(star_rating)


def scrape_reviews(current_page_url):
  # Send a GET request to the URL
  response = requests.get(current_page_url)
  # Check if the request was successful (status code 200)
  if response.status_code == 200:
    # Parse the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all review elements on the page
    all_reviews = soup.find_all('article', {'itemprop': 'review'})

    for idx, single_review in enumerate(all_reviews, start=1):
      review_keys = ('name', 'country', 'date', 'verify', 'rating', 'aircraft',
                     'type_of_traveller', 'seat_type', 'route', 'date_flown',
                     'seat_comfort')

      for keys in review_keys:
        if keys == 'name':
          find_name(single_review)
        elif keys == 'country':
          find_country(single_review)
        elif keys == 'date':
          find_date(single_review)
        elif keys == 'verify':
          is_verified(single_review)
        elif keys == 'rating':
          find_rating(single_review)
        elif keys == 'aircraft':
          find_aircraft(single_review)
        elif keys == 'type_of_traveller':
          find_traveller(single_review)
        elif keys == 'seat_type':
          find_seat(single_review)
        elif keys == 'route':
          find_route(single_review)
        elif keys == 'date_flown':
          find_date_flown(single_review)
        elif keys == 'seat_comfort':
          find_seat_comfort(single_review)
  else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

# Iterate through the pages
num_pages = 1

for page_number in range(1, num_pages + 1):
  # Generate the URL for the current page
  current_page_url = f"{url}/page/{page_number}/"
  print(current_page_url)
  # Call the function to scrape reviews from the current page
  scrape_reviews(current_page_url)


# Now 'reviews' contains lists of names and countries in the correct order
for key, values in reviews.items():
  print(f'{key}:')
  for value in values:
    print(f'    {value}')
print()  # Print an empty line between key-value pairs


https://www.airlinequality.com/airline-reviews/british-airways/page/1/
name:
    A Gittens
    Bert Vissers
    E Smyth
    L Keele
    Austin Jones
    M A Collie
    Nigel Dean
    Gaylynne Simpson
    A Narden
    Graeme Boothman
country:
    United Kingdom
    Switzerland
    United Kingdom
    Germany
    United States
    Ireland
    Australia
    United States
    Romania
    United Kingdom
date:
    2023-11-30
    2023-11-26
    2023-11-23
    2023-11-19
    2023-11-19
    2023-11-16
    2023-11-16
    2023-11-14
    2023-11-12
    2023-11-08
trip_verified:
    Trip Verified
    Trip Verified
    Trip Verified
    Trip Verified
    Trip Verified
    Not Verified
    Trip Verified
    Not Verified
    Trip Verified
    Trip Verified
rating:
    5
    1
    9
    1
    3
    8
    1
    1
    1
    8
aircraft:
    Boeing 787-9
    None
    A380
    None
    A320
    A320
    None
    None
    A320
    Boeing 777-300
type_of_traveller:
    Solo Leisure
    Family Leisure
    Coupl