## **Airline Reviews: Topic & Sentiment Analysis**
This notebook performs topic modelling and sentiment classification on airline customer reviews.

In [None]:
# Install required libraries
!pip install -q bertopic[visualization] sentence-transformers umap-learn hdbscan transformers torch datasets nltk matplotlib wordcloud

In [None]:
!pip install pycountry pandas

In [None]:
import pandas as pd
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
from umap import UMAP
from hdbscan import HDBSCAN
from sklearn.feature_extraction.text import CountVectorizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import re
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Download NLTK stopwords if not already installed
nltk.download('stopwords')

In [None]:
# Mount Google Drive to access the dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load the cleaned airline reviews dataset
df = pd.read_csv('/content/drive/MyDrive/airline_reviews_cleaned_dataset.csv')
# Keep only the 'customer_review' column, drop missing values, and truncate long reviews
texts = df['customer_review'].dropna().astype(str).str[:800].tolist()

In [None]:
# Load libraries
from sentence_transformers import SentenceTransformer
import pycountry
import nltk
from nltk.corpus import stopwords
import pandas as pd
import requests
from io import StringIO

# Download NLTK stopwords if not already done
nltk.download('stopwords')

# Load city names from GitHub dataset
url = "https://raw.githubusercontent.com/datasets/world-cities/master/data/world-cities.csv"
try:
    response = requests.get(url)
    data = StringIO(response.text)
    df_cities = pd.read_csv(data)
    city_names = df_cities['name'].str.strip().str.lower().unique().tolist()
except Exception as e:
    print("Error loading city data:", e)
    city_names = []

# Get all country names
country_names = [country.name for country in pycountry.countries]

# Define custom stopwords
custom_stopwords = ['Business Class','Economy Class','First Class','Premium Economy', 'flight', 'flights', 'airline', 'airlines', 'bag', 'boarding', 'check','carry', 'staff', 'united', 'american', 'delta', 'wizz', 'easyjet','ryanair', 'airport', 'passenger', 'travel', 'ticket', 'seat', 'gate','name', 'trip', 'verified','adria', 'aegean', 'aer', 'lingus', 'aeroflot', 'russian', 'aeromexico','air', 'arabia', 'canada', 'france', 'new', 'zealand', 'airasia', 'alaska','alitalia', 'ana', 'all', 'nippon', 'asiana', 'austrian', 'avianca','bangkok', 'british', 'brussels', 'china', 'eastern', 'southern', 'copa','egyptair', 'emirates', 'ethiopian', 'etihad', 'eurowings', 'eva','finnair', 'flydubai', 'frontier', 'garuda', 'indonesia', 'germanwings','gulf', 'iberia', 'icelandair', 'indigo', 'jetblue', 'klm', 'royal','dutch', 'korean', 'kuwait', 'latam', 'lot', 'polish', 'lufthansa','norwegian', 'pegasus', 'qantas', 'qantaslink', 'qatar', 'maroc','jordanian', 'sas', 'scandinavian', 'saudi', 'arabian', 'singapore','south', 'african', 'southwest', 'spirit', 'sunwing', 'swiss', 'intl','tap', 'portugal', 'tarom', 'romanian', 'thai', 'smile', 'tunisair','turkish', 'ukraine', 'international', 'virgin', 'america', 'vueling','wow', 'lines', 'airways']

# Add NLTK English stopwords
custom_stopwords += list(stopwords.words('english'))

# Add country and city names
custom_stopwords += country_names
custom_stopwords += city_names

# Remove duplicates
custom_stopwords = list(set(custom_stopwords))  # Ensures uniqueness

# Load embedding model (moved here for better flow)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

print(f"Total stopwords loaded: {len(custom_stopwords)}")

In [None]:
# Configure CountVectorizer to remove stopwords and create n-grams (1-2 word phrases)
vectorizer_model = CountVectorizer(
    stop_words=custom_stopwords,
    min_df=5 # Ignore words that appear in fewer than 5 reviews
)

# Configure UMAP for dimensionality reduction before clustering
umap_model = UMAP(
    n_neighbors=15,     # Number of neighbors considered during embedding
    n_components=5,     # Reduce to 5 dimensions
    min_dist=0.0,       # Minimum distance between points
    metric='cosine',    # Similarity measure
    random_state=42     # Reproducibility
)

# Configure HDBSCAN for hierarchical clustering of similar reviews
hdbscan_model = HDBSCAN(
    min_cluster_size=30,              # Minimum size of a cluster
    metric='euclidean',               # Distance metric
    cluster_selection_method='eom',   # Method for selecting clusters
    prediction_data=True              # Enables representative docs
)

In [None]:
# Initialize BERTopic with all custom models and settings
topic_model = BERTopic(
    embedding_model=embedding_model,  # Model used for embeddings
    vectorizer_model=vectorizer_model,  # Custom stopwords and n-grams
    umap_model=umap_model,            # Dimensionality reduction
    hdbscan_model=hdbscan_model,      # Clustering algorithm
    language="english",               # Language of the text
    calculate_probabilities=True,     # Get topic probabilities per document
    verbose=True                      # Print progress
)

In [None]:
topics, probs = topic_model.fit_transform(texts)
df['topic'] = topics

In [None]:
topic_info = topic_model.get_topic_info()
print(topic_info[['Topic', 'Name', 'Count']])

In [None]:
# Filter out outlier reviews (topic = -1)
df_filtered = df[df['topic'] != -1]
texts_filtered = df_filtered['customer_review'].tolist()
topics_filtered = df_filtered['topic'].tolist()

In [None]:
# Use a lightweight RoBERTa-based sentiment classifier from Hugging Face
model_name = "cardiffnlp/twitter-roberta-base-sentiment"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a Hugging Face pipeline for sentiment analysis
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1 # Use GPU if available
)

# Map output labels to readable sentiment names
label_map = {
    "LABEL_0": "negative",
    "LABEL_1": "neutral",
    "LABEL_2": "positive"
}

# Classify each review in batches to avoid memory overload
batch_size = 32
sentiments = []

for i in range(0, len(texts), batch_size):
    batch = texts[i:i+batch_size]
    results = sentiment_pipeline(batch)
    sentiments.extend([label_map[res['label']] for res in results])

# Add sentiment labels to the DataFrame
df['sentiment'] = sentiments

In [None]:
# Group reviews by topic and compute sentiment distribution
topic_summary = df.groupby('topic').agg(
    review_count=('customer_review', 'count'),
    positive=('sentiment', lambda x: (x == 'positive').mean()),
    neutral=('sentiment', lambda x: (x == 'neutral').mean()),
    negative=('sentiment', lambda x: (x == 'negative').mean()),
    top_sentiment=('sentiment', lambda x: x.value_counts().idxmax())
).reset_index()

# Get automatically generated topic names from BERTopic
topic_info = topic_model.get_topic_info()

# Merge topic names into summary table
topic_summary = topic_summary.merge(topic_info[['Topic', 'Name']], left_on='topic', right_on='Topic')
topic_summary = topic_summary.drop(columns=['Topic'])

# Sort by most negative sentiment for pain point identification
topic_summary.sort_values('negative', ascending=False, inplace=True)

# Show top 10 most negative topics
print("Top 10 Topics by Negative Sentiment")
topic_summary.head(10)

In [None]:
# Show top words per topic
print(topic_info[['Topic', 'Name', 'Count']])

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plot the most negative topics using a bar chart
plt.figure(figsize=(12,6))
sns.barplot(data=topic_summary.head(10), x='Name', y='negative', palette='Reds_r')
plt.xticks(rotation=45, ha='right')
plt.title('Top 10 Topics by % Negative Sentiment')
plt.ylabel('% Negative Reviews')
plt.tight_layout()
plt.show()

In [None]:
# Function to retrieve example reviews by topic and sentiment
def get_example_reviews(df, topic_id, sentiment, n=3):
    return df[(df['topic'] == topic_id) & (df['sentiment'] == sentiment)]['customer_review'].sample(n, replace=True).tolist()

# Example: Show 3 negative reviews from the most negative topic
most_negative_topic = topic_summary.iloc[0]['topic']
print("Negative Reviews for Topic", most_negative_topic)
print(get_example_reviews(df, most_negative_topic, 'negative'))

In [None]:
import random

def test_review(review_text, topic_model, sentiment_pipeline):
    """Classify a given review into topic and sentiment"""
    # Get topic
    topic_id, probs = topic_model.transform([review_text])
    topic_words = topic_model.get_topic(topic_id[0])
    topic_name = ", ".join([word for word, _ in topic_words]) if topic_words else "Unknown/Outlier"

    # Get sentiment
    sentiment_result = sentiment_pipeline([review_text])[0]
    sentiment_label = sentiment_result['label']
    sentiment_score = sentiment_result['score']

    # Print results
    print("\n📝 Review:")
    print(review_text)
    print("\n🏷️ Predicted Topic ID:", topic_id[0])
    print("🔤 Topic Name:", topic_name)
    print("\n😃 Sentiment:", label_map[sentiment_label])
    print("📊 Confidence Score:", round(sentiment_score, 4))

In [None]:
# Ask user if they want to enter their own review or use a random one
choice = input("Do you want to enter your own review? (y/n): ").strip().lower()

if choice == 'y':
    user_input = input("Enter your review: ")
    test_review(user_input, topic_model, sentiment_pipeline)
else:
    # Pick a random review from the DataFrame
    random_index = random.randint(0, len(df) - 1)
    random_review = df.iloc[random_index]['customer_review']
    print(f"\n🔢 Randomly selected index: {random_index}")
    test_review(random_review, topic_model, sentiment_pipeline)

In [None]:
def explain_sentiment(df, topic_id, sentiment='negative', n=3):
    examples = df[(df['topic'] == topic_id) & (df['sentiment'] == sentiment)]['customer_review'].sample(n, replace=True)
    print(f"\n📘 Reviews where customers felt '{sentiment}' about topic {topic_id}:")
    for i, rev in enumerate(examples):
        print(f"{i+1}. {rev[:200]}...")  # First 200 chars only

In [None]:
most_negative_topic = topic_summary.iloc[0]['topic']
explain_sentiment(df, most_negative_topic, 'negative')

In [None]:
insight_rows = []

for _, row in topic_summary.iterrows():
    topic_id = row['topic']
    name = row['Name']
    sentiment = row['top_sentiment']
    count = row['review_count']

    try:
        example_reviews = df[(df['topic'] == topic_id) & (df['sentiment'] == sentiment)]['customer_review'].sample(min(2, count)).tolist()
    except:
        example_reviews = ["No sample available"]

    insight_rows.append({
        "Topic ID": topic_id,
        "Topic Name": name,
        "Most Common Sentiment": sentiment,
        "Review Count": count,
        "Examples": example_reviews
    })

insight_df = pd.DataFrame(insight_rows)
insight_df.sort_values('Review Count', ascending=False, inplace=True)

# Export to CSV for reporting
insight_df.to_csv("topic_insights_with_reasons.csv", index=False)

# Display top 10 insights
insight_df.head(10)

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def generate_wordcloud_for_topic(df, topic_number):
    reviews = df[df['topic'] == topic_number]['customer_review'].dropna()
    text = " ".join(reviews.astype(str).str.lower())
    wordcloud = WordCloud(width=800, height=400, background_color='white', max_words=50).generate(text)

    plt.figure(figsize=(10,5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title(f"Common Words in Topic {topic_number}")
    plt.show()

In [None]:
generate_wordcloud_for_topic(df, most_negative_topic)

### 1. Overview of Methodology

This study employed a hybrid natural language processing approach combining topic modeling using BERTopic and sentiment classification using RoBERTa to analyze over 50,000 airline customer reviews. The primary objective was to uncover recurring themes within the reviews and assess the overall sentiment associated with each theme. This dual-layered analysis enabled the identification of key pain points experienced by customers, offering actionable insights for service improvement.

The methodology involved several stages: data preprocessing, embedding generation using sentence transformers, topic modeling, sentiment classification, and result aggregation. Custom stopwords were used to refine the topic extraction process, ensuring domain-specific noise such as "flight" and "luggage" did not dominate the results. Sentiment classification was performed using a lightweight version of the RoBERTa model fine-tuned on Twitter sentiment data.

### 2. Topic Modeling Output

Through the use of BERTopic, more than 100 topics were extracted from the dataset. Each topic was represented by a set of keywords derived using c-TF-IDF weighting, allowing for meaningful interpretation of the underlying themes.

Some of the most prominent topics included:
- Norwegian flight cancellations and refund issues
- Lost luggage and baggage claim problems
- Delays and rebooking frustrations
- Complaints about Spirit Airlines’ service
- Issues with boarding passes and staff assistance

These topics provided an overview of the most commonly discussed aspects of the customer experience across various airlines.

### 3. Sentiment Analysis per Topic

Each review was classified into one of three sentiment categories: positive, neutral, or negative. Aggregating this data at the topic level revealed which themes were associated with the strongest negative emotions.

One of the most notable findings was that the topic related to Norwegian Airline’s Gatwick refund issues had a **100% negative sentiment**. Other highly negative topics included complaints about lost luggage, delays, poor communication, and inadequate customer support.

Across the top ten most negative topics, the percentage of negative sentiment ranged from **87.6% to 100%**, indicating widespread dissatisfaction among passengers regarding these specific areas of service.

### 4. Visualization: Bar Chart of Top Negative Topics

A bar chart was generated to visually represent the percentage of negative sentiment across the top ten topics. This visualization highlighted the severity of customer dissatisfaction linked to:
- Flight cancellations and rebooking issues
- Baggage mishandling
- Poor customer service
- Unexpected fees and lack of transparency
- Long wait times and language barriers during support calls

The chart served as a powerful tool for identifying priority areas that require immediate attention from airline management.

### 5. Qualitative Insights from Reviews

Three example reviews were selected from the most negatively rated topic, which revolved around Norwegian Airlines' cancellation policies and customer service failures.

The first review detailed a situation where a flight was canceled at the last minute, resulting in a four-hour wait to reach a representative. Passengers were eventually rebooked but faced further complications including overbooking and delayed baggage.

Another review described being left without clear information after a flight cancellation, leading to costly alternative arrangements and a complete loss of trust in the airline.

The third review criticized the frequent delays and lack of proactive communication, suggesting that the airline's operational inefficiencies have long-term implications for brand loyalty.

These qualitative examples reinforced the quantitative findings, showing that customer dissatisfaction often stems from a combination of poor communication, lack of empathy, and systemic operational issues.

### 6. Discussion

The combined results of topic modeling and sentiment classification offer deep insights into the customer experience within the airline industry.

Key findings include:
- Norwegian and Spirit Airlines received the highest number of negative reviews, particularly concerning cancellations, refunds, and customer service.
- Lost luggage and baggage handling consistently appeared as major concerns across multiple topics.
- Communication breakdowns during disruptions led to increased frustration, even when alternative flights were offered.
- Overbooking practices and unclear refund policies contributed significantly to customer distrust.

To improve passenger satisfaction, airlines should focus on enhancing transparency during disruptions, improving multilingual support systems, refining overbooking strategies, and investing in better baggage tracking technologies.

### 7. Conclusion

This research successfully applied advanced NLP techniques to extract valuable insights from unstructured customer feedback. By integrating topic modeling and sentiment classification, it identified critical areas where airlines can take corrective actions to enhance customer experience.

Future research could expand this framework to enable real-time sentiment monitoring, integrate feedback analysis with CRM systems, or develop predictive models to anticipate customer churn based on sentiment trends.

In [None]:
aspects = [
    # Existing ones
    "service", "staff", "crew", "attitude",
    "luggage", "baggage", "lost", "claim",
    "flight", "delay", "cancellation", "late",
    "boarding", "process", "gate", "check-in",
    "seat", "comfort", "legroom", "space",
    "food", "meal", "snack", "drink",
    "price", "cost", "value", "money",
    "cleanliness", "dirty", "hygiene",
    "entertainment", "wifi", "tv", "screen",
    "communication", "update", "notice",
    "ground service", "check in","in-flight entertainment", "cabin crew", "flight path", "flight delay",
    "refund policy", "rebooking", "online check-in"]

In [None]:
import re

def extract_aspects(review_text, aspect_list):
    """Detect which aspects are mentioned in a review"""
    review_text = review_text.lower()
    matched = []
    for aspect in aspect_list:
        if re.search(r'\b' + re.escape(aspect) + r'\b', review_text):
            matched.append(aspect)
    return list(set(matched))

In [None]:
def analyze_review_with_aspects(review_text, topic_model, sentiment_pipeline, aspects):
    # Truncate review to 510 tokens max (leave room for [CLS] and [SEP])
    truncated_review = review_text[:510]

    # Get topic
    topic_id, probs = topic_model.transform([truncated_review])
    topic_name = topic_model.get_topic_info().iloc[topic_id[0]]['Name']

    # Get sentiment
    sentiment_result = sentiment_pipeline(truncated_review)[0]
    sentiment_label = label_map[sentiment_result['label']]
    sentiment_score = sentiment_result['score']

    # Extract aspects from review
    detected_aspects = extract_aspects(review_text, aspects)

    # Print results
    print("\n📝 Review:")
    print(review_text)

    print("🔤 Topic Name:", topic_name)

    print("\n😃 Sentiment:", sentiment_label)
    print("📊 Confidence Score:", round(sentiment_score, 4))

    print("\n🔍 Detected Aspects Mentioned:")
    if detected_aspects:
        print("- " + "\n- ".join(detected_aspects))
    else:
        print("No clear aspects detected.")

    print("\n📌 Summary Explanation:")
    if detected_aspects:
        print(f"This review is {sentiment_label} due to issues with: {', '.join(detected_aspects)}")
    else:
        print(f"This review is {sentiment_label}, but no specific aspect could be identified.")

In [None]:
random_index = random.randint(0, len(df) - 1)
random_review = df.iloc[random_index]['customer_review']
analyze_review_with_aspects(random_review, topic_model, sentiment_pipeline, aspects)

In [None]:
df['detected_aspects'] = df['customer_review'].apply(lambda x: extract_aspects(x, aspects))

In [None]:
aspect_sentiment = df.explode('detected_aspects').dropna(subset=['detected_aspects'])
aspect_sentiment.groupby(['detected_aspects', 'sentiment']).size().unstack(fill_value=0)

In [None]:
aspect_counts = df.explode('detected_aspects').groupby(['detected_aspects', 'sentiment']).size().unstack(fill_value=0)
aspect_counts['negative_ratio'] = aspect_counts['negative'] / (aspect_counts[['negative', 'positive']].sum(axis=1))

plt.figure(figsize=(10, 6))
sns.barplot(data=aspect_counts.sort_values('negative_ratio', ascending=False).head(10),
            x='negative_ratio', y=aspect_counts.sort_values('negative_ratio', ascending=False).head(10).index)
plt.title("Top Aspects with Highest % of Negative Feedback")
plt.xlabel("% Negative Reviews")
plt.ylabel("Aspect")
plt.show()

In [None]:
# Apply aspect extraction across all reviews
df['detected_aspects'] = df['customer_review'].apply(lambda x: extract_aspects(x, aspects))

# Explode the dataframe so each row has one aspect per review
aspect_sentiment = df.explode('detected_aspects').dropna(subset=['detected_aspects'])

# Group by aspect and sentiment
aspect_counts = aspect_sentiment.groupby(['detected_aspects', 'sentiment']).size().unstack(fill_value=0)

# Add total and negative ratio columns
aspect_counts['total'] = aspect_counts.sum(axis=1)
aspect_counts['negative_ratio'] = aspect_counts['negative'] / aspect_counts['total']
aspect_counts = aspect_counts.sort_values(by='negative_ratio', ascending=False)

# Show top 10 most negatively reviewed aspects
print("Top Aspects by Negative Sentiment Ratio")
print(aspect_counts.head(10))

### **Aspect-Based Sentiment Analysis (ABSA)**

In [None]:
!pip install -q transformers torch

In [None]:
from transformers import pipeline, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")

In [None]:
# Define function for ABSA
def analyze_aspect_sentiment(review, aspect):
    inputs = tokenizer(f"[CLS] {review} [SEP] {aspect} [SEP]", return_tensors="pt")
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1).detach().numpy()[0]

    sentiment = ["negative", "neutral", "positive"][probs.argmax()]
    return sentiment

In [None]:
random_review = df.iloc[100]['customer_review']
detected_aspects = extract_aspects(random_review, aspects)  # From your existing function

print("Review:", random_review)
for aspect in detected_aspects:
    sentiment = analyze_aspect_sentiment(random_review, aspect)
    print(f"- {aspect}: {sentiment}")

In [None]:
random_review = df.iloc[20]['customer_review']
detected_aspects = extract_aspects(random_review, aspects)  # From your existing function

print("Review:", random_review)
for aspect in detected_aspects:
    sentiment = analyze_aspect_sentiment(random_review, aspect)
    print(f"- {aspect}: {sentiment}")

In [None]:
random_review = df.iloc[105]['customer_review']
detected_aspects = extract_aspects(random_review, aspects)  # From your existing function

print("Review:", random_review)
for aspect in detected_aspects:
    sentiment = analyze_aspect_sentiment(random_review, aspect)
    print(f"- {aspect}: {sentiment}")