# Import the necessary libraries and download the required nltk data:

In [2]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import gensim
from gensim import corpora
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
import matplotlib.pyplot as plt

# download the required nltk data
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\Shahzaib
[nltk_data]     Khan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
Data = pd.read_csv("Clean reviews.csv").drop("Unnamed: 0",axis=1)
Data

Unnamed: 0,clean_reviews
0,Not Verified | At Copenhagen the most chaotic...
1,Worst experience of my life trying to deal wi...
2,Due to code sharing with Cathay Pacific I was...
3,LHR check in was quick at the First Wing and ...
4,I wouldn't recommend British Airways at all. ...
...,...
3525,Flight from Heathrow to Toronto. Booked emerge...
3526,LHR to HAM. Purser addresses all club passenge...
3527,My son who had worked for British Airways urge...
3528,London City-New York JFK via Shannon on A318 b...


In [4]:
# initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [5]:
# apply sentiment analysis to each row of the "clean_reviews" column
Data['sentiment'] = Data['clean_reviews'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
Data

Unnamed: 0,clean_reviews,sentiment
0,Not Verified | At Copenhagen the most chaotic...,0.1015
1,Worst experience of my life trying to deal wi...,-0.9600
2,Due to code sharing with Cathay Pacific I was...,-0.8510
3,LHR check in was quick at the First Wing and ...,0.9251
4,I wouldn't recommend British Airways at all. ...,-0.9372
...,...,...
3525,Flight from Heathrow to Toronto. Booked emerge...,0.8608
3526,LHR to HAM. Purser addresses all club passenge...,0.8720
3527,My son who had worked for British Airways urge...,0.6640
3528,London City-New York JFK via Shannon on A318 b...,0.9148


In [6]:
average_sentiment = Data['sentiment'].mean()
print('Average sentiment polarity score:', average_sentiment)

Average sentiment polarity score: 0.1792445609065156


# Tokenize the "clean_reviews" column of the DataFrame and remove stopwords:

In [7]:
# create a tokenizer object
tokenizer = RegexpTokenizer(r'\w+')

# create a list of stopwords
stop_words = stopwords.words('english')

# define a function to tokenize and remove stopwords
def preprocess(text):
    tokens = tokenizer.tokenize(text.lower())
    tokens = [token for token in tokens if token not in stop_words]
    return tokens

# apply the preprocessing function to each review in the "clean_reviews" column
Data['tokens'] = Data['clean_reviews'].apply(preprocess)

# Create a dictionary and a corpus for topic modeling:

In [8]:
# create a dictionary from the tokens
dictionary = corpora.Dictionary(Data['tokens'])

# create a corpus from the dictionary and the tokens
corpus = [dictionary.doc2bow(tokens) for tokens in Data['tokens']]

# Train an LDA model on the corpus:

In [9]:
# train an LDA model on the corpus
num_topics = 10
lda_model = gensim.models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics)

# Analyze the topics by printing the top words in each topic:

In [10]:
# print the top words in each topic
for topic_id in range(num_topics):
    words = lda_model.show_topic(topic_id, topn=10)
    topic = f"Topic {topic_id}: {' '.join([word[0] for word in words])}"
    print(topic)

Topic 0: flight ba time staff london crew food service seat heathrow
Topic 1: ba flight seat food time service economy airways would british
Topic 2: flight good ba food crew seats cabin service seat time
Topic 3: british ba airways service flight get business london class crew
Topic 4: flight ba class airways service economy food seats one staff
Topic 5: flight ba seat service business airways british would good meal
Topic 6: flight crew food cabin ba service boarding good seats would
Topic 7: flight ba us staff would cabin check british food airways
Topic 8: flight ba service airways british time seats seat food class
Topic 9: flight ba seat service seats get lhr class time one


## These topics suggest that the dataset primarily contains reviews of British Airways flights to/from London, with a focus on various aspects such as the quality of service, the seats and cabin, the food, and the business class experience.

