In [18]:
#pip install pandas

In [19]:
import time
import requests
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# Biblioteca para automação
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re

# Biblioteca para scraping
from bs4 import BeautifulSoup

In [20]:
# Selenium
service = Service()
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)

In [21]:
url = 'https://www.airlinequality.com/airline-reviews/british-airways/page/{}/?sortby=post_date%3ADesc&pagesize=100'

# BeautifulSoup
headers = {}
response = requests.get(url, headers=headers)
html = response.text

soup = BeautifulSoup(response.content, 'html.parser')

In [22]:
# Funções para scraping

def extract_titles(article):
    """
    Extrai os títulos das avaliações.

    Args:
        article: Objeto BeautifulSoup que representa a review.

    Returns:
        str: Retorna somente o texto do título da avaliação.
    """
    return article.find('h2', class_='text_header').get_text()

def extract_comments(article):
    """
    Extrai os comentários das avaliações.

    Args:
        article: Objeto BeautifulSoup que representa a review.

    Returns:
        str: Retorna somente o texto do comentário da avaliação.
    """
    return article.find('div', class_='text_content').get_text()

def extract_rating(article):
    """
    Extrai a classificação das avaliações.
    
    Args:
        article: Objeto BeautifulSoup que representa a review.

    Returns:
        str or None: Retorna somente o texto da classificação da avaliação ou None se não for encontrada.
    """
    rating_element = article.find('div', class_='rating-10')
    if rating_element:
        rating_span = rating_element.find('span', itemprop='ratingValue')
        if rating_span:
            return rating_span.get_text()
    return None

def extract_author_info(article):
    """
    Extrai informações sobre o autor da avaliação, incluindo nome, local e data de publicação.

    Args:
        article: Objeto BeautifulSoup que representa a review.

    Returns:
        tuple: Retorna uma tupla contendo o nome do autor, local e data de publicação, respectivamente.
    """
    author_element = article.find('h3', class_='text_sub_header userStatusWrapper').find('span', itemprop='name')
    author_name = author_element.get_text() if author_element else None

    location_text = article.find('h3', class_='text_sub_header userStatusWrapper').get_text()
    location_match = re.search(r'\((.*?)\)', location_text)
    location = location_match.group(1) if location_match else None

    date_element = article.find('h3', class_='text_sub_header userStatusWrapper').find('time', itemprop='datePublished')
    date_published = date_element['datetime'] if date_element else None
    
    return author_name, location, date_published

def extract_review_values(review_table):
    """
    Extrai os valores das avaliações.

    Args:
        article: Objeto BeautifulSoup que representa a tabela com notas das reviews

    Returns:
        dict: Retorna um dicionário contendo os valores das avaliações.
    """
    review_values = {}
    rows = review_table.find_all('tr')
    for row in rows:
        header = row.find('td', class_='review-rating-header')
        if header:
            item_name = header.get_text(strip=True)
            value_cell = row.find('td', class_='review-value')
            if value_cell:
                item_value = value_cell.get_text(strip=True)
                review_values[item_name] = item_value
    return review_values

def extract_review_stars(review_table):
    """
    Extrai o número de estrelas das avaliações.

    Args:
        review_table (BeautifulSoup): O objeto BeautifulSoup que representa a tabela de avaliações.

    Returns:
        dict: Retorna um dicionário contendo o número de estrelas das avaliações.
    """
    review_stars = {}
    rows = review_table.find_all('tr')
    for row in rows:
        header = row.find('td', class_='review-rating-header')
        if header:
            item_name = header.get_text(strip=True)
            stars = row.find_all('span', class_='star fill')
            if stars:
                num_stars = len(stars)
                review_stars[item_name] = num_stars
    return review_stars

In [23]:
# Lista para armazenar as informações
list_data = []

# Número máximo de páginas para extrair
max_pages = 2
current_page = 1

# Loop para percorrer as páginas e pegar todas as reviews
while current_page <= max_pages:
    url = 'https://www.airlinequality.com/airline-reviews/british-airways/page/{}/?sortby=post_date%3ADesc&pagesize=100'.format(current_page)
    driver.get(url) 

    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Seção 'article' da página WEB, em que há todos as reviews
    comment_articles = soup.find_all('article', class_='comp_media-review-rated')

    # Loop para pegar todas as informações de uma review
    for article in comment_articles:
        title = extract_titles(article)
        comment = extract_comments(article)
        rating = extract_rating(article)
        author_name, location, date_published = extract_author_info(article)
        
        # Verificar tabela de avaliações dentro da review
        review_table = article.find('table', class_='review-ratings')
        
        if review_table:
            # Extrair os valores das avaliações
            review_values = extract_review_values(review_table)
            
            # Extrair o número de estrelas das avaliações
            review_stars = extract_review_stars(review_table)
            
            # Adicionar todos os dados à lista
            data = {
                'Title': title,
                'Comment': comment,
                'Rating': rating,
                'Author Name': author_name,
                'Location': location,
                'Date Published': date_published,
                **review_values, 
                **review_stars    
            }
            
            list_data.append(data)

    current_page += 1

    time.sleep(3)

driver.quit()


In [24]:
# Criar DataFrame com as informações extraídas
df = pd.DataFrame(list_data)

df

Unnamed: 0,Title,Comment,Rating,Author Name,Location,Date Published,Type Of Traveller,Seat Type,Route,Date Flown,Recommended,Seat Comfort,Cabin Staff Service,Food & Beverages,Inflight Entertainment,Ground Service,Wifi & Connectivity,Value For Money,Aircraft
0,"""Buckle up, folks; this one's a rollercoaster""",✅ Trip Verified | Dear Community I feel compel...,1,Bert Vissers,Switzerland,2023-11-26,Family Leisure,Economy Class,Zürich to Tokyo Haneda via London,July 2023,no,1.0,1.0,1.0,1.0,1.0,1.0,1,
1,"""cabin crew excellent""",✅ Trip Verified | Quick bag drop at First Win...,9,E Smyth,United Kingdom,2023-11-23,Couple Leisure,First Class,London to Miami,November 2023,yes,5.0,5.0,5.0,5.0,4.0,2.0,4,A380
2,"""Service level far worse then Ryanair""",✅ Trip Verified | 4 Hours before takeoff we r...,1,L Keele,Germany,2023-11-19,Couple Leisure,Economy Class,London to Stuttgart,November 2023,no,1.0,1.0,,,1.0,,1,
3,"""do not upgrade members based on status""",✅ Trip Verified | I recently had a delay on B...,3,Austin Jones,United States,2023-11-19,Business,Economy Class,Brussels to London,November 2023,no,2.0,3.0,1.0,2.0,1.0,2.0,2,A320
4,"""Flight was smooth and quick""","Not Verified | Boarded on time, but it took a...",8,M A Collie,Ireland,2023-11-16,Couple Leisure,Business Class,London Heathrow to Dublin,November 2023,yes,3.0,3.0,4.0,,4.0,,3,A320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"""airline is going downhill rapidly""",Not Verified | Flew from London Heathrow to M...,1,Derek McLeod,United Kingdom,2023-03-26,Couple Leisure,Economy Class,London Heathrow to Marrakech,March 2023,no,1.0,3.0,1.0,,1.0,,1,
196,"""made a complaint, nothing happened""",Not Verified | I was meant to fly in January t...,1,Mohammed Laidi,United Kingdom,2023-03-21,Family Leisure,Economy Class,London to Algiers,May 2022,no,1.0,1.0,,,1.0,,2,
197,"""Every flight was delayed""",✅ Trip Verified | We have flown repeatedly wi...,1,S Walder,Austria,2023-03-21,Couple Leisure,Economy Class,Miami to Munich via London,March 2023,yes,3.0,1.0,3.0,4.0,1.0,2.0,1,
198,"""how far the quality level has slid""",✅ Trip Verified | I was horrified by the extr...,3,L Garson,United States,2023-03-20,Business,Economy Class,New York to London,March 2023,no,1.0,1.0,1.0,2.0,2.0,,2,


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Title                   200 non-null    object 
 1   Comment                 200 non-null    object 
 2   Rating                  200 non-null    object 
 3   Author Name             200 non-null    object 
 4   Location                200 non-null    object 
 5   Date Published          200 non-null    object 
 6   Type Of Traveller       199 non-null    object 
 7   Seat Type               200 non-null    object 
 8   Route                   199 non-null    object 
 9   Date Flown              200 non-null    object 
 10  Recommended             200 non-null    object 
 11  Seat Comfort            186 non-null    float64
 12  Cabin Staff Service     185 non-null    float64
 13  Food & Beverages        158 non-null    float64
 14  Inflight Entertainment  109 non-null    fl

In [26]:
# Converter as colunas 'Date Published' e 'Date Flown' para datetime
df['Date Published'] = pd.to_datetime(df['Date Published'])
df['Date Flown'] = pd.to_datetime(df['Date Flown'])


# Converter a coluna "Rating" em int
df['Rating'] = df['Rating'].astype('Int64')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Title                   200 non-null    object        
 1   Comment                 200 non-null    object        
 2   Rating                  200 non-null    Int64         
 3   Author Name             200 non-null    object        
 4   Location                200 non-null    object        
 5   Date Published          200 non-null    datetime64[ns]
 6   Type Of Traveller       199 non-null    object        
 7   Seat Type               200 non-null    object        
 8   Route                   199 non-null    object        
 9   Date Flown              200 non-null    datetime64[ns]
 10  Recommended             200 non-null    object        
 11  Seat Comfort            186 non-null    float64       
 12  Cabin Staff Service     185 non-null    float64   

In [27]:
# Lista de mapeamento de valores
mapping = {
    'A318': 'Airbus A318',
    'A19': 'Airbus A319',
    'A219': 'Airbus A319',
    'A319': 'Airbus A319',
    'A320': 'Airbus A320',
    'A230': 'Airbus A320',
    'A321': 'Airbus A321',
    'A322': 'Airbus A322',
    'Airbus A32': 'Airbus A322',
    'A330': 'Airbus A330',
    'A350': 'Airbus A350',
    'A380': 'Airbus A380',
    'Boeing 737': 'Boeing 737',
    'Boeing 747': 'Boeing 747',
    'B0eing 747-400': 'Boeing 747',
    'B747': 'Boeing 747',
    'Boeing 767': 'Boeing 767',
    '767-300': 'Boeing 767',
    'Boeing 777': 'Boeing 777',
    'B777': 'Boeing 777',
    '777': 'Boeing 777',
    'Boeing\xa0777': 'Boeing 777',
    '777-200': 'Boeing 777',
    '777-300': 'Boeing 777',
    'Boieng 777-200': 'Boeing 777',
    'Boeing 787': 'Boeing 787',
    'Dreamliner': 'Boeing 787',
    '787': 'Boeing 787',
    'E170': 'Embraer 170',
    'E-170': 'Embraer 170',
    'E190': 'Embraer 190',
    'E-190': 'Embraer 190',
    'Embraer-190': 'Embraer 190',
    'Embraer': 'Embraer 190',
    'EMB190': 'Embraer 190',
    'Various': None,
    'SAAB 2000': 'Saab 2000',
    'Saab 2000': 'Saab 2000'
}

# Função para mapear os valores
def map_aircraft(value):
    for key, replacement in mapping.items():
        if key.lower() in str(value).lower():
            return replacement
    return value

df['Manufacturer_Model'] = df['Aircraft'].apply(map_aircraft)
df

Unnamed: 0,Title,Comment,Rating,Author Name,Location,Date Published,Type Of Traveller,Seat Type,Route,Date Flown,Recommended,Seat Comfort,Cabin Staff Service,Food & Beverages,Inflight Entertainment,Ground Service,Wifi & Connectivity,Value For Money,Aircraft,Manufacturer_Model
0,"""Buckle up, folks; this one's a rollercoaster""",✅ Trip Verified | Dear Community I feel compel...,1,Bert Vissers,Switzerland,2023-11-26,Family Leisure,Economy Class,Zürich to Tokyo Haneda via London,2023-07-01,no,1.0,1.0,1.0,1.0,1.0,1.0,1,,
1,"""cabin crew excellent""",✅ Trip Verified | Quick bag drop at First Win...,9,E Smyth,United Kingdom,2023-11-23,Couple Leisure,First Class,London to Miami,2023-11-01,yes,5.0,5.0,5.0,5.0,4.0,2.0,4,A380,Airbus A380
2,"""Service level far worse then Ryanair""",✅ Trip Verified | 4 Hours before takeoff we r...,1,L Keele,Germany,2023-11-19,Couple Leisure,Economy Class,London to Stuttgart,2023-11-01,no,1.0,1.0,,,1.0,,1,,
3,"""do not upgrade members based on status""",✅ Trip Verified | I recently had a delay on B...,3,Austin Jones,United States,2023-11-19,Business,Economy Class,Brussels to London,2023-11-01,no,2.0,3.0,1.0,2.0,1.0,2.0,2,A320,Airbus A320
4,"""Flight was smooth and quick""","Not Verified | Boarded on time, but it took a...",8,M A Collie,Ireland,2023-11-16,Couple Leisure,Business Class,London Heathrow to Dublin,2023-11-01,yes,3.0,3.0,4.0,,4.0,,3,A320,Airbus A320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"""airline is going downhill rapidly""",Not Verified | Flew from London Heathrow to M...,1,Derek McLeod,United Kingdom,2023-03-26,Couple Leisure,Economy Class,London Heathrow to Marrakech,2023-03-01,no,1.0,3.0,1.0,,1.0,,1,,
196,"""made a complaint, nothing happened""",Not Verified | I was meant to fly in January t...,1,Mohammed Laidi,United Kingdom,2023-03-21,Family Leisure,Economy Class,London to Algiers,2022-05-01,no,1.0,1.0,,,1.0,,2,,
197,"""Every flight was delayed""",✅ Trip Verified | We have flown repeatedly wi...,1,S Walder,Austria,2023-03-21,Couple Leisure,Economy Class,Miami to Munich via London,2023-03-01,yes,3.0,1.0,3.0,4.0,1.0,2.0,1,,
198,"""how far the quality level has slid""",✅ Trip Verified | I was horrified by the extr...,3,L Garson,United States,2023-03-20,Business,Economy Class,New York to London,2023-03-01,no,1.0,1.0,1.0,2.0,2.0,,2,,


In [28]:
# Verificar siglas dos aeroportos
routes = df['Route']
routes_str = routes.astype(str)

# Extrair siglas
three_letter_codes = re.findall(r'\b[A-Z]{3}\b', ' '.join(routes_str))

# Remover duplicatas
unique_codes = set(three_letter_codes)

print(unique_codes)


{'LAX', 'LHR', 'JFK'}


In [29]:
# Lista de siglas de aeroportos para transformar em cidades
mapping_cities = {
    'GIG': 'Rio de Janeiro',
    'SOF': 'Sofia',
    'IAD': 'Washington',
    'YVR': 'Vancouver',
    'LHR': 'London Heathrow',
    'LGW': 'London Gatwick',
    'MAD': 'Madrid',
    'MXP': 'Milan',
    'CDG': 'Paris',
    'IST': 'Istanbul',
    'JNB': 'Johannesburg',
    'ORD': 'Chicago',
    'BOM': 'Mumbai',
    'DUB': 'Dublin',
    'AMS': 'Amsterdam',
    'FRA': 'Frankfurt',
    'HND': 'Tokyo Haneda',
    'SFO': 'San Francisco',
    'SYD': 'Sydney',
    'DXB': 'Dubai',
    'PEK': 'Beijing',
    'BCN': 'Barcelona',
    'MUC': 'Munich',
    'VIE': 'Vienna',
    'ATH': 'Athens',
    'CPT': 'Cape Town',
    'DEL': 'Delhi',
    'SEA': 'Seattle',
    'MIA': 'Miami',
    'TLV': 'Tel Aviv',
    'RUH': 'Riyadh',
    'AUH': 'Abu Dhabi',
    'GRU': 'Sao Paulo',
    'GVA': 'Geneva',
    'SIN': 'Singapore',
    'BKK': 'Bangkok',
    'HKG': 'Hong Kong',
    'LAX': 'Los Angeles',
    'JFK': 'New York',
    'YYZ': 'Toronto',
    'FCO': 'Rome',
    'PRG': 'Prague',
    'MEX': 'Mexico City',
    'NBO': 'Nairobi',
    'SYD': 'Sydney',
    'AMS': 'Amsterdam',
    'DUS': 'Dusseldorf',
    'BOG': 'Bogota',
    'EZE': 'Buenos Aires',
    'ICN': 'Seoul',
    'IST': 'Istanbul',
    'PEK': 'Beijing',
    'BOM': 'Mumbai',
    'SVO': 'Moscow',
    'DOH': 'Doha',
    'DXB': 'Dubai',
    'BNE': 'Brisbane',
    'LAS': 'Las Vegas',
    'DFW': 'Dallas/Fort Worth',
    'HKT': 'Phuket',
    'KUL': 'Kuala Lumpur',
    'MAA': 'Chennai',
    'ACC': 'Accra',
    'LOS': 'Lagos',
    'DKR': 'Dakar',
    'CMN': 'Casablanca',
    'KGL': 'Kigali',
    'EBB': 'Entebbe',
    'ADD': 'Addis Ababa',
    'ABV': 'Abuja',
    'DUR': 'Durban',
    'CAI': 'Cairo',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'MCT': 'Muscat',
    'DMM': 'Dammam',
    'KRT': 'Khartoum',
    'ADB': 'Izmir',
    'ESB': 'Ankara',
    'AYT': 'Antalya',
    'ADA': 'Adana',
    'ISL': 'Istanbul',
    'BJV': 'Bodrum',
    'DLM': 'Dalaman',
    'LXR': 'Luxor',
    'HRG': 'Hurghada',
    'SSH': 'Sharm El Sheikh',
    'AMM': 'Amman',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'DMM': 'Dammam',
    'KRT': 'Khartoum',
    'IST': 'Istanbul',
    'SAW': 'Istanbul',
    'ADB': 'Izmir',
    'AYT': 'Antalya',
    'BJV': 'Bodrum',
    'DLM': 'Dalaman',
    'LXR': 'Luxor',
    'HRG': 'Hurghada',
    'SSH': 'Sharm El Sheikh',
    'CAI': 'Cairo',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'MCT': 'Muscat',
    'RUH': 'Riyadh',
    'JED': 'Jeddah',
    'MED': 'Madinah',
    'DMM': 'Dammam',
    'AMM': 'Amman',
    'BEY': 'Beirut',
    'DAM': 'Damascus',
    'KWI': 'Kuwait City',
    'BAH': 'Bahrain',
    'DOH': 'Doha',
    'AUH': 'Abu Dhabi',
    'GLA': 'Glasgow',
    'DEN': 'Denver',
    'HKG': 'Hong Kong',
    'ARN': 'Estocolmo',
    'LAX': 'Los Angeles',
    'KUL': 'Kuala Lumpur',
    'ATL': 'Atlanta',
    'GVA': 'Genebra',
    'BOS': 'Boston',
    'CDG': 'Paris Charles de Gaulle',
    'MAN': 'Manchester',
    'SEL': 'Seul',
    'BSL': 'Basileia',
    'JNB': 'Joanesburgo',
    'BWI': 'Baltimore-Washington',
    'AMS': 'Amsterdã',
    'JFK': 'Nova York John F. Kennedy',
    'CNX': 'Chiang Mai',
    'CPH': 'Copenhague',
    'ORD': 'Chicago',
    'ACC': 'Acra',
    'GLA': 'Glasgow',
    'DME': 'Moscou Domodedovo',
    'CDG': 'Paris Charles de Gaulle',
    'BAH': 'Manama',
    'IAD': 'Washington Dulles',
    'LAS': 'Las Vegas',
    'LCY': 'Londres City',
    'CUN': 'Cancún',
    'EDI': 'Edimburgo',
    'YYZ': 'Toronto',
    'FUE': 'Fuerteventura',
    'CPT': 'Cidade do Cabo',
    'PEK': 'Pequim',
    'INV': 'Inverness',
    'MUC': 'Munique',
    'GOT': 'Gotemburgo',
    'IAH': 'Houston George Bush',
    'KBP': 'Kiev Boryspil',
    'BFS': 'Belfast',
    'BHD': 'Belfast',
    'MUC': 'Munich',
    'PER': 'Perth',
    'ABZ': 'Aberdeen',
    'LHR': 'London',
    'EBB': 'Entebbe',
    'NCE': 'Nice',
    'LUX': 'Luxembourg',
    'ATH': 'Athens',
    'DEL': 'Delhi',
    'LIM': 'Lima',
    'EDI': 'Edinburgh',
    'MEX': 'Mexico City',
    'ZRH': 'Zurich',
    'JTR': 'Santorini',
    'CTU': 'Chengdu',
    'DUS': 'Dusseldorf',
    'LAS': 'Las Vegas',
    'INV': 'Inverness',
    'PRG': 'Prague',
    'RTM': 'Rotterdam',
    'DXB': 'Dubai',
    'MAN': 'Manchester',
    'BWI': 'Baltimore',
    'STR': 'Stuttgart',
    'PUJ': 'Punta Cana',
    'BGO': 'Bergen',
    'PMI': 'Palma de Mallorca',
    'TRN': 'Turin',
    'AGP': 'Malaga',
    'DME': 'Moscow',
    'UVF': 'St. Lucia',
    'PEK': 'Beijing',
    'WAW': 'Warsaw',
    'NBO': 'Nairobi',
    'PSA': 'Pisa',
    'CDG': 'Paris',
    'LAD': 'Luanda',
    'BKK': 'Bangkok',
    'CTA': 'Catania',
    'SYD': 'Sydney',
    'NAS': 'Nassau',
    'PVG': 'Shanghai',
    'YYZ': 'Toronto',
    'ALC': 'Alicante',
    'VIE': 'Vienna',
    'ANU': 'Antigua',
    'VCE': 'Venice',
    'LIS': 'Lisbon',
    'EWR': 'Newark',
    'MRS': 'Marseille',
    'SIN': 'Singapore',
    'YYC': 'Calgary',
    'DEN': 'Denver',
    'CLT': 'Charlotte',
    'SEL': 'Seoul',
    'BOM': 'Mumbai',
    'TAV': 'Tallinn',
    'KUL': 'Kuala Lumpur',
    'OTP': 'Bucharest',
    'AUS': 'Austin',
    'BGI': 'Bridgetown',
    'OUL': 'Oulu',
    'ABV': 'Abuja',
    'SXF': 'Berlin',
    'LBA': 'Leeds',
    'GIG': 'Rio de Janeiro',
    'LCY': 'London City',
    'SJO': 'San Jose',
    'TLV': 'Tel Aviv',
    'DBV': 'Dubrovnik',
    'LGW': 'London Gatwick',
    'ACC': 'Accra',
    'MLE': 'Malé',
    'SZG': 'Salzburg',
    'AMS': 'Amsterdam',
    'YUL': 'Montreal',
    'IAD': 'Washington D.C.',
    'PHX': 'Phoenix',
    'PHL': 'Philadelphia',
    'SAN': 'San Diego',
    'SVQ': 'Seville',
    'SJC': 'San Jose',
    'LAX': 'Los Angeles',
    'FLR': 'Florence',
    'BNE': 'Brisbane',
    'BUD': 'Budapest',
    'TPA': 'Tampa',
    'NCL': 'Newcastle',
    'MAD': 'Madrid',
    'BCN': 'Barcelona',
    'LYS': 'Lyon',
    'RAK': 'Marrakech',
    'BIO': 'Bilbao',
    'JFK': 'New York',
    'KRK': 'Krakow',
    'DUB': 'Dublin',
    'MLA': 'Malta',
    'SFO': 'San Francisco',
    'GLA': 'Glasgow',
    'OPO': 'Porto',
    'FNC': 'Funchal',
    'SVO': 'Moscow',
    'BRU': 'Brussels',
    'NAP': 'Naples',
    'YVR': 'Vancouver',
    'SNN': 'Shannon',
    'IKA': 'Tehran',
    'ORD': 'Chicago',
    'PFO': 'Paphos',
    'ORY': 'Paris Orly',
    'IOM': 'Isle of Man',
    'GRU': 'Sao Paulo',
    'BLR': 'Bangalore',
    'OSL': 'Oslo',
    'SEA': 'Seattle',
    'LCA': 'Larnaca',
    'MIA': 'Miami',
    'BFS': 'Belfast',
    'HKG': 'Hong Kong',
    'GVA': 'Geneva',
    'KGS': 'Kos',
    'BOS': 'Boston',
    'BRI': 'Bari',
    'JNB': 'Johannesburg',
    'TXL': 'Berlin',
    'FCO': 'Rome',
    'MCI': 'Kansas City',
    'GIB': 'Gibraltar',
    'MCO': 'Orlando',
    'CNX': 'Chiang Mai',
    'BDA': 'Bermuda',
    'CPT': 'Cape Town',
    'MEL': 'Melbourne',
    'HEL': 'Helsinki',
    'BAH': 'Bahrain',
    'IBZ': 'Ibiza',
    'VLC': 'Valencia',
    'LED': 'Saint Petersburg',
    'AUH': 'Abu Dhabi',
    'FAO': 'Faro',
    'MRU': 'Mauritius',
    'BHD': 'Belfast',
    'ABZ': 'Aberdeen',
    'LIM': 'Lima',
    'JTR': 'Santorini',
    'CTU': 'Chengdu',
    'RTM': 'Rotterdam',
    'PUJ': 'Punta Cana',
    'BGO': 'Bergen',
    'PMI': 'Palma de Mallorca',
    'TRN': 'Turin',
    'AGP': 'Malaga',
    'UVF': 'St. Lucia',
    'WAW': 'Warsaw',
    'PSA': 'Pisa',
    'ALC': 'Alicante',
    'ANU': 'Antigua',
    'VCE': 'Venice',
    'EWR': 'Newark',
    'MRS': 'Marseille',
    'YYC': 'Calgary',
    'BGI': 'Bridgetown',
    'OUL': 'Oulu',
    'SJO': 'San Jose',
    'DBV': 'Dubrovnik',
    'MLE': 'Male',
    'SZG': 'Salzburg',
    'AMS': 'Amsterdam',
    'IOM': 'Isle of Man',
    'BLR': 'Bangalore',
    'OSL': 'Oslo',
    'LCA': 'Larnaca',
    'BRI': 'Bari',
    'DME': 'Moscow',
    'CDG': 'Paris',
    'CUN': 'Cancun',
    'LAD': 'Luanda',
    'CTA': 'Catania',
    'NAS': 'Nassau',
    'INV': 'Inverness',
    'LYS': 'Lyon',
    'RAK': 'Marrakech',
    'BIO': 'Bilbao',
    'KRK': 'Krakow',
    'MLA': 'Malta',
    'FNC': 'Funchal',
    'ORY': 'Orly (Paris)',
    'IKA': 'Tehran',
    'PFO': 'Paphos',
    'ORD': 'Chicago',
    'IBZ': 'Ibiza',
    'VLC': 'Valencia',
    'LED': 'St. Petersburg',
    'FAO': 'Faro',
    'MRU': 'Mauritius'
}

df['Route'] = df['Route'].apply(lambda route: ' to '.join(mapping_cities.get(airport, airport) for airport in str(route).split(' to ')))
df

Unnamed: 0,Title,Comment,Rating,Author Name,Location,Date Published,Type Of Traveller,Seat Type,Route,Date Flown,Recommended,Seat Comfort,Cabin Staff Service,Food & Beverages,Inflight Entertainment,Ground Service,Wifi & Connectivity,Value For Money,Aircraft,Manufacturer_Model
0,"""Buckle up, folks; this one's a rollercoaster""",✅ Trip Verified | Dear Community I feel compel...,1,Bert Vissers,Switzerland,2023-11-26,Family Leisure,Economy Class,Zürich to Tokyo Haneda via London,2023-07-01,no,1.0,1.0,1.0,1.0,1.0,1.0,1,,
1,"""cabin crew excellent""",✅ Trip Verified | Quick bag drop at First Win...,9,E Smyth,United Kingdom,2023-11-23,Couple Leisure,First Class,London to Miami,2023-11-01,yes,5.0,5.0,5.0,5.0,4.0,2.0,4,A380,Airbus A380
2,"""Service level far worse then Ryanair""",✅ Trip Verified | 4 Hours before takeoff we r...,1,L Keele,Germany,2023-11-19,Couple Leisure,Economy Class,London to Stuttgart,2023-11-01,no,1.0,1.0,,,1.0,,1,,
3,"""do not upgrade members based on status""",✅ Trip Verified | I recently had a delay on B...,3,Austin Jones,United States,2023-11-19,Business,Economy Class,Brussels to London,2023-11-01,no,2.0,3.0,1.0,2.0,1.0,2.0,2,A320,Airbus A320
4,"""Flight was smooth and quick""","Not Verified | Boarded on time, but it took a...",8,M A Collie,Ireland,2023-11-16,Couple Leisure,Business Class,London Heathrow to Dublin,2023-11-01,yes,3.0,3.0,4.0,,4.0,,3,A320,Airbus A320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"""airline is going downhill rapidly""",Not Verified | Flew from London Heathrow to M...,1,Derek McLeod,United Kingdom,2023-03-26,Couple Leisure,Economy Class,London Heathrow to Marrakech,2023-03-01,no,1.0,3.0,1.0,,1.0,,1,,
196,"""made a complaint, nothing happened""",Not Verified | I was meant to fly in January t...,1,Mohammed Laidi,United Kingdom,2023-03-21,Family Leisure,Economy Class,London to Algiers,2022-05-01,no,1.0,1.0,,,1.0,,2,,
197,"""Every flight was delayed""",✅ Trip Verified | We have flown repeatedly wi...,1,S Walder,Austria,2023-03-21,Couple Leisure,Economy Class,Miami to Munich via London,2023-03-01,yes,3.0,1.0,3.0,4.0,1.0,2.0,1,,
198,"""how far the quality level has slid""",✅ Trip Verified | I was horrified by the extr...,3,L Garson,United States,2023-03-20,Business,Economy Class,New York to London,2023-03-01,no,1.0,1.0,1.0,2.0,2.0,,2,,


In [30]:
# Salvar df em .csv
df.to_csv(r'G:\Meu Drive\Projetos\Python\British Airlines\data_test.csv', index=False, encoding='utf-8')