In [2]:
# Importing Basic Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
import spacy
import string
from scipy.special import softmax
import warnings
warnings.filterwarnings("ignore")

# Importing Libraries for Text Preprocessing
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize,sent_tokenize, TweetTokenizer
from nltk.stem.wordnet import WordNetLemmatizer
nlp = spacy.load('en_core_web_sm')
from mappings import contraction_mapping, chat_words_replacements

In [3]:
df = pd.read_csv('airline_df_cleaned.csv')
reviews = df[['Airline','Country','Review']]
reviews

Unnamed: 0,Airline,Country,Review
0,indigo-airlines,India,✅ Trip Verified | Flight was punctual. But no ...
1,indigo-airlines,India,"✅ Trip Verified | My sister, niece and mother..."
2,indigo-airlines,India,✅ Trip Verified | My 77-year-old father was fl...
3,indigo-airlines,India,Not Verified | IndiGo are a low cost airline ...
4,indigo-airlines,India,✅ Trip Verified | My flight 6e 1176 which was...
...,...,...,...
15201,jet-airways,India,Good Service Verona VRN - Moscow DME. 30 minut...
15202,jet-airways,India,SYD- MEL. My husband and I were truly impresse...
15203,jet-airways,India,SYD-MEL-SYD. It's hard to fault their in-fligh...
15204,jet-airways,India,MEL-SYD return. 737-200 both ways. Clean aircr...


In [4]:
# Splitting the reviews on the basis of |
reviews['Review'] = reviews['Review'].apply(lambda x: x.lower())

def de_emojify(text):
    # Regular expression pattern to match emojis
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # emoticons
        "\U0001F300-\U0001F5FF"  # symbols & pictographs
        "\U0001F680-\U0001F6FF"  # transport & map symbols
        "\U0001F1E0-\U0001F1FF"  # flags (iOS)
        "\U00002500-\U00002BEF"  # chinese characters
        "\U00002702-\U000027B0"
        "\U00002702-\U000027B0"
        "\U000024C2-\U0001F251"
        "\U0001f926-\U0001f937"
        "\U00010000-\U0010ffff"
        "\u2640-\u2642"
        "\u2600-\u2B55"
        "\u200d"
        "\u23cf"
        "\u23e9"
        "\u231a"
        "\ufe0f"  # dingbats
        "\u3030"
        "]+",
        flags=re.UNICODE,
    )

    # Remove emojis from the text
    de_emojified_text = emoji_pattern.sub(r"", text)

    return de_emojified_text

reviews['Review'] = reviews['Review'].apply(lambda x: de_emojify(x))
# Splitting on the basis of |
reviews['Review'] = reviews['Review'].apply(lambda x: x.split('|'))
# Checking the length of the reviews if its 2 then taking the last one and if its 1 then taking the first one
reviews['Review'] = reviews['Review'].apply(lambda x: x[-1] if len(x) == 2 else x[0])

# Removing the newlines, tabs or any other special characters
reviews['Review'] = reviews['Review'].apply(lambda x: re.sub(r'\n|\t|\r', '', x))

# Replacing the contractions with their expansions
reviews['Review'] = reviews['Review'].apply(lambda x: ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in x.split(" ")]))

# Replace the chat words with their expansions
reviews['Review'] = reviews['Review'].apply(lambda x: " ".join([chat_words_replacements[t] if t in chat_words_replacements else t for t in x.split(" ")]))

# Remmvoing Html Tags if present
def remove_html(text):
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r'', text)

reviews['Review'] = reviews['Review'].apply(lambda x: remove_html(x))


# Removing the urls if present
def remove_url(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

reviews['Review'] = reviews['Review'].apply(lambda x: remove_url(x))

# Removing the punctuations
def remove_punctuations(text):
    punc = re.compile(r'[!"#$%&()*+,-./:;<=>?@[\]^_`{|}~]')
    return punc.sub(r' ', text)

reviews['Review'] = reviews['Review'].apply(lambda x: remove_punctuations(x))

# Removing the spaces and replacing them with single space
def remove_spaces(text):
    spaces = re.compile(r'\s+')
    return spaces.sub(r' ', text)

reviews['Review'] = reviews['Review'].apply(lambda x: remove_spaces(x))

# Striping the text
reviews['Review'] = reviews['Review'].apply(lambda x: x.strip())

# using BERT model for sentiment analysis
from transformers import AutoTokenizer,AutoModelForSequenceClassification
import torch
# Initializing the tokenizer and model
model = 'nlptown/bert-base-multilingual-uncased-sentiment'
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForSequenceClassification.from_pretrained(model)
# Tokenizing the text
reviews['Tokens'] = reviews['Review'].apply((lambda x: tokenizer.encode(x, return_tensors='pt',max_length=1000,truncation=True)))
reviews['Result'] = reviews['Tokens'].apply(lambda x: model(x)[0])

In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Function to get the sentiment score
def get_sentiment_score(text):
    score = analyzer.polarity_scores(text)
    return score['compound']

# Getting the sentiment score
reviews['Sentiment_Score_Vader'] = reviews['Review'].apply(lambda x: get_sentiment_score(x))

In [6]:
# Changing the sentiment score to positive, negative and neutral
def get_sentiment(score):
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'
    pass

# Getting the sentiment
reviews['Sentiment_Vader'] = reviews['Sentiment_Score_Vader'].apply(lambda x: get_sentiment(x))

In [7]:
reviews.Sentiment_Vader.value_counts()

Sentiment_Vader
Positive    9906
Negative    5078
Neutral      222
Name: count, dtype: int64

In [8]:
psotive_rev = reviews[reviews['Sentiment_Vader'] == 'Positive'].sort_values(by = 'Sentiment_Score_Vader', ascending = False).head(10)
psotive_rev

Unnamed: 0,Airline,Country,Review,Sentiment_Score_Vader,Sentiment_Vader
6994,qatar-airways,Qatar,kuwait to munich via doha the kuwait to doha f...,0.9997,Positive
6589,qatar-airways,Qatar,qatar airways generous covid policy made this ...,0.9988,Positive
9937,korean-air,South Korea,auckland to london return with a side trip to ...,0.9988,Positive
15170,jet-airways,India,las vegas to stansted i loved the seats food a...,0.9984,Positive
6973,qatar-airways,Qatar,manchester to auckland via doha i am a very ne...,0.9984,Positive
6565,qatar-airways,Qatar,flew 1st march 2022 on brand new aircrafts b78...,0.9984,Positive
4201,emirates,UAE,bru dxb on boeing 777 300 in business class th...,0.9983,Positive
3708,emirates,UAE,gatwick to mauritius return via dubai we have ...,0.9982,Positive
10834,singapore-airlines,Singapore,stockholm to singapore and onwards to phuket w...,0.9982,Positive
9821,korean-air,South Korea,this was my sixth time with korean air to new ...,0.9981,Positive


In [9]:
for i in psotive_rev.Review:
    print(i)
    print('\n')

kuwait to munich via doha the kuwait to doha flight was operated on an a bit old but good a330 300 the doha to munich was operated on a great boeing b777 300er starting from the short 50 minute kwi— doh flight it was great good and comfortable aircraft screens were a bit outdated on the a330 but still has the latest oryx one movies the legroom was fantastic however window seats only have an ife box under each seat blocking legroom service on that short flight was great cabin crew were very nice beautiful deep blue mood lighting which created a beautiful atmosphere inside the cabin and the flight was on time connection in doha was super easy smooth and efficient beautiful super modern airport with great toilets range of shops and restaurants and very nice staff boarding started few minutes after the landing from kuwait so we rushed to our next flight to munich second flight was operated on a new and very comfortable boeing b777 300er as much as people complained about the 10 abreast 777

In [10]:
# Negative Reviews
negative_rev = reviews[reviews['Sentiment_Vader'] == 'Negative'].sort_values(by = 'Sentiment_Score_Vader', ascending = True).head(10)
negative_rev

Unnamed: 0,Airline,Country,Review,Sentiment_Score_Vader,Sentiment_Vader
6032,air-china,China,when we arrived at beijing airport at 10 30 pm...,-0.9974,Negative
2755,emirates,UAE,we had the most horrific experience with emira...,-0.9969,Negative
5820,air-china,China,bangkok to london via beijing i have never wri...,-0.9968,Negative
10999,singapore-airlines,Singapore,flew sydney to amsterdam returning from paris ...,-0.9958,Negative
2970,emirates,UAE,had a disastrous experience with emirates rece...,-0.9958,Negative
9457,vietjetair,Vietnam,chiang mai to bangkok i have flown more than 1...,-0.9954,Negative
2873,emirates,UAE,brisbane to zurich via dubai after hearing not...,-0.9944,Negative
14254,jet-airways,India,we checked in at about 8am for our amsterdam b...,-0.9944,Negative
2927,emirates,UAE,trivandrum to new york via dubai i have been a...,-0.9943,Negative
9569,vietjetair,Vietnam,danang ho chi minh bangkok to save a little mo...,-0.9942,Negative


In [11]:
for i in negative_rev.Review:
    print(i)
    print('\n')

when we arrived at beijing airport at 10 30 pm to check in for our flight to paris we were told that the flight was delayed to 5 50 in the morning due to thunderstorm i could not believe that as we had previously flown in much worse weather it was not even raining but i will not argue with the decision air china made as safety is a priority we were already so tired and we were not happy as our return flight to korea with air china had already been rescheduled inconveniently it was very hard to access to the wifi at the airport so it was very difficult to change our pick up arrangements to accommodate the late arrival in paris i asked the air china duty manager at the check in desk to let us stay at the hourly hotel in the airport for a couple of hours as we were tired and a shower was needed considering the humid beijing weather i am very unhappy with how we were dealt with one of the duty managers told us he understood how annoying the situation was for us furthermore he said he would

In [12]:
# Sentiment Analysis using TextBlob
from textblob import TextBlob

# Function to get the sentiment score
def get_sentiment_score(text):
    score = TextBlob(text).sentiment.polarity
    return score

# Getting the sentiment score
reviews['Sentiment_Score_TextBlob'] = reviews['Review'].apply(lambda x: get_sentiment_score(x))

In [13]:
# Changing the sentiment score to positive, negative and neutral
def get_sentiment(score):
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'
    pass

# Getting the sentiment
reviews['Sentiment_TextBlob'] = reviews['Sentiment_Score_TextBlob'].apply(lambda x: get_sentiment(x))

In [14]:
reviews.Sentiment_TextBlob.value_counts()

Sentiment_TextBlob
Positive    10204
Negative     2575
Neutral      2427
Name: count, dtype: int64

In [15]:
psotive_rev = reviews[reviews['Sentiment_TextBlob'] == 'Positive'].sort_values(by = 'Sentiment_Score_TextBlob', ascending = False).head(10)
psotive_rev

Unnamed: 0,Airline,Country,Review,Sentiment_Score_Vader,Sentiment_Vader,Sentiment_Score_TextBlob,Sentiment_TextBlob
11707,ana-all-nippon-airways,Japan,tokyo haneda to bangkok i wish to put on recor...,0.9287,Positive,1.0,Positive
8390,qatar-airways,Qatar,kul doha and doha muc economy both sectors ver...,0.4372,Positive,1.0,Positive
6568,qatar-airways,Qatar,qatar airways is the best in the world and i r...,0.9726,Positive,1.0,Positive
4821,emirates,UAE,we were a group of 4 that flew akl mel in ek 4...,0.5563,Positive,0.953333,Positive
4152,emirates,UAE,everything was perfect except wi fi and connec...,0.743,Positive,0.95,Positive
11013,singapore-airlines,Singapore,it was a very pleasant flight from hong kong t...,0.9221,Positive,0.917778,Positive
6902,qatar-airways,Qatar,manila to gothenburg via doha the flight from ...,-0.4221,Negative,0.91,Positive
656,spicejet,India,great inflight experience with generous and we...,0.9565,Positive,0.9,Positive
4551,emirates,UAE,flew economy lgw ak all cabin crew excellent s...,0.3948,Positive,0.9,Positive
5757,air-china,China,beijing to san francisco i had a very pleasant...,0.9168,Positive,0.888333,Positive


In [16]:
for i in psotive_rev.Review:
    print(i)
    print('\n')

tokyo haneda to bangkok i wish to put on record the wonderful service i and my son received on your flight on 31 march 2019 the flight attendants were smiling and ever helpful so was the airport staff thank you and keep it up


kul doha and doha muc economy both sectors very happy with dreamliner between doha and munich although i do not like the 3 4 3 seating arrangement


qatar airways is the best in the world and i received the best quality service from them the cabin crew is the number one if you want to be treated like a friend and cared like a friend choose qatar airways


we were a group of 4 that flew akl mel in ek 403 qf 8761 on dec 25 it was a very pleasant flight with a 380


everything was perfect except wi fi and connectivity if you are going to have wi fi on a plane you need to make it so that you can load a page it needs to be faster otherwise there is no point in even connecting to it everything else was brilliant unlimited food


it was a very pleasant flight from hong

In [17]:
# Negative Reviews
negative_rev = reviews[reviews['Sentiment_TextBlob'] == 'Negative'].sort_values(by = 'Sentiment_Score_TextBlob', ascending = True).head(10)
negative_rev

Unnamed: 0,Airline,Country,Review,Sentiment_Score_Vader,Sentiment_Vader,Sentiment_Score_TextBlob,Sentiment_TextBlob
14213,jet-airways,India,jeddah to kozhikode via mumbai lost baggage pa...,-0.4215,Negative,-1.0,Negative
4720,emirates,UAE,ek0349 8 03 2014 colombo to dubai i found the ...,-0.6597,Negative,-1.0,Negative
554,spicejet,India,i have booked flight from mumbai to kanpur and...,-0.8091,Negative,-1.0,Negative
854,spicejet,India,hyderabad to ahmedabad via bhopal worst airlin...,-0.7184,Negative,-1.0,Negative
2107,air-india,India,bangkok to mumbai we were travelling along wit...,-0.6369,Negative,-1.0,Negative
820,spicejet,India,madurai to chennai very worst service i did no...,0.0763,Positive,-1.0,Negative
1159,airasia-india,India,delhi to pune worst customer service flight on...,-0.8885,Negative,-1.0,Negative
625,spicejet,India,i made a webcheckin and paid for the selected ...,-0.7494,Negative,-1.0,Negative
10382,singapore-airlines,Singapore,melbourne to singapore the check in experience...,-0.6808,Negative,-1.0,Negative
2429,goair,India,delhi to ranchi i wanted to change my flight d...,0.8357,Positive,-1.0,Negative


In [18]:
for i in negative_rev.Review:
    print(i)
    print('\n')
    

jeddah to kozhikode via mumbai lost baggage pathetic response from their team if you care your belongings never ever travel with jet airways since 3 days i didn’t receive baggage


ek0349 8 03 2014 colombo to dubai i found the service terrible it was 2 hours into the flight before we were served anything not even a drink of water was brought i even rang for someone to come and it was ignored would not use emirates on this sector again


i have booked flight from mumbai to kanpur and after checking they call me that your flight is delayed 4 hrs staff also not helpful they just say take your refund and take flight for another day this is what spicejet do worst experience will never travel with this flight


hyderabad to ahmedabad via bhopal worst airline ever twice in a row my flight got canceled and made me sit on connecting flight which took 08 hours and that their service is the worst i have seen from cabin crew to the call attendees at customer care


bangkok to mumbai we were travel

In [27]:
# Sentiment Analysis using Transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Initializing the tokenizer and model

task='sentiment'
model_name = f"cardiffnlp/twitter-roberta-base-{task}"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [28]:
# Function to get the sentiment score
def get_sentiment_score(text):
    try:
        inputs = tokenizer(text, return_tensors="pt")
        outputs = model(**inputs)
        logits = outputs.logits
        score = torch.softmax(logits, dim=1).tolist()[0]
        return score
    except:
        return [0.0, 0.0, 0.0]

# Getting the sentiment score of first 1000 reviews
reviews['Sentiment_Score_Transformers'] = reviews.loc[:1000, 'Review'].apply(lambda x: get_sentiment_score(x))

In [29]:
# Changing the sentiment score to positive, negative and neutral
def get_sentiment(score):
    if score[0] >= 0.05:
        return 'Negative'
    elif score[2] >= 0.05:
        return 'Positive'
    else:
        return 'Neutral'
    pass

# Getting the sentiment
reviews['Sentiment_Transformers'] = reviews.loc[:1000,'Sentiment_Score_Transformers'].apply(lambda x: get_sentiment(x))

In [30]:
reviews.Sentiment_Transformers.value_counts()

Sentiment_Transformers
Negative    619
Positive    376
Neutral       6
Name: count, dtype: int64

In [31]:
psotive_rev = reviews[reviews['Sentiment_Transformers'] == 'Positive'].sort_values(by = 'Sentiment_Score_Transformers', ascending = False).head(10)
psotive_rev

Unnamed: 0,Airline,Country,Review,Sentiment_Score_Vader,Sentiment_Vader,Sentiment_Score_TextBlob,Sentiment_TextBlob,Sentiment_Score_Transformers,Sentiment_Transformers
116,indigo-airlines,India,delhi to goa had read some negative reviews so...,-0.2263,Negative,0.129762,Positive,"[0.04831696301698685, 0.21060946583747864, 0.7...",Positive
457,vistara,India,delhi to mumbai vistara’s service is aimed at ...,0.9871,Positive,0.268288,Positive,"[0.048094742000103, 0.18605473637580872, 0.765...",Positive
834,spicejet,India,i was flying from hyderabad to new delhi on sp...,0.9764,Positive,0.180636,Positive,"[0.04674440994858742, 0.4067288935184479, 0.54...",Positive
717,spicejet,India,i always travel by spicejet and unlike other a...,0.9468,Positive,0.152083,Positive,"[0.04672093316912651, 0.20440895855426788, 0.7...",Positive
354,vistara,India,i was coming from usa and having connecting fl...,0.8121,Positive,0.1,Positive,"[0.04607264697551727, 0.37734106183052063, 0.5...",Positive
704,spicejet,India,bengaluru to kolkata by spicejet as this was a...,0.9112,Positive,0.141679,Positive,"[0.04401770606637001, 0.2729383707046509, 0.68...",Positive
321,vistara,India,booked on 7am flight to delhi on 12th jan flig...,0.9846,Positive,0.281373,Positive,"[0.043384384363889694, 0.14350968599319458, 0....",Positive
124,indigo-airlines,India,delhi to istanbul plane seemed old 1 3 years o...,0.6344,Positive,0.119444,Positive,"[0.04328488931059837, 0.21449770033359528, 0.7...",Positive
907,spicejet,India,hyderabad to delhi i had only hand baggage so ...,0.4417,Positive,0.134722,Positive,"[0.043193269520998, 0.20313729345798492, 0.753...",Positive
966,spicejet,India,kochi to chennai was a short and pleasant flig...,0.1404,Positive,0.255934,Positive,"[0.04230685904622078, 0.19680002331733704, 0.7...",Positive


In [32]:
for i in psotive_rev.Review:
    print(i)
    print('\n')

delhi to goa had read some negative reviews so was slightly apprehensive i shouldn’t have worried this was an outstanding airline for a low cost carrier everything was exceptional from check in to boarding the crew were so immaculately groomed and the cabin service was excellent nothing was too much trouble they worked hard to ensure that the cabin was as tidy as possible to enable a quick turn round for the return flight maybe other low cost carriers should have a look at indigo to see how it’s done


delhi to mumbai vistara’s service is aimed at a premium overall appeal the website interface is smooth although the app could be better designed web check in is smooth and you don’t need to pay extra to pre reserve a window aisle seat of choice even spot check in is easy bag drops are always hassle free even in busy terminals like the t3 at new delhi the seats have comfortable leather leg and knee room is among the best and you also get a coat hanger hook however no in flight entertainme