In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

In [34]:
urls = []
for page_num in range(1,11):
  url= f'https://www.airlinequality.com/airline-reviews/british-airways/page/{page_num}/?sortby=post_date%3ADesc&pagesize=100'
  urls.append(url)

#To get multiple urls, had to make a list of different urls. Then in order to get response of each url we need to make it as a list too
#and save the response in that list.

In [35]:
responses=[]
for link in urls:
    response=requests.get(link)
    response.raise_for_status()
    responses.append(response)

In [36]:
soups=[]
for r in responses:
    soup=BeautifulSoup(r.content,'html.parser')
    soups.append(soup)
#Since response of each url was in responses list, had to go through each response and use soup on it and save each soup in a list

In [37]:
all_reviews=[]
for s in soups:
    reviews=s.find('article', class_="comp comp_reviews-airline querylist position-content").find_all('article')
    all_reviews.append(reviews)
#Same resoning here

In [38]:
data=[]
for a in all_reviews:
    count=0
    for review in a: #'a' here can be interpreted as review in all_reviews list. So basically here we are going through each review in all_reviews and scraping required information from it.
        if review:
            count+=1
        review_data={} #Using dictionary here to make a dataframe from this
        name=review.find('h3', class_='text_sub_header userStatusWrapper').find('span',itemprop="name").text.strip()
        date=review.find('h3', class_='text_sub_header userStatusWrapper').find('time',itemprop="datePublished").text.strip()
        country=review.find('h3', class_='text_sub_header userStatusWrapper').find('span',itemprop="name").string.next_element[2:-2]
        rating = review.find('div', class_='rating-10').text.strip()[0]
        title = review.find('h2', class_='text_header').text.strip()
        content = review.find('div', class_='text_content').text.strip(' ')[16:].lstrip('|  ')
        aircraft=review.find('td',class_="review-rating-header aircraft")
        aircraft_=review.find('td',class_="review-value").next_element.text.strip() if aircraft else None
        type_of_traveller=None

        seat_type,route,date_flown,recommended,seat_comfort,cabin_staff_service,f_b,entertainment,ground_service,wifi,value_for_money=None,None,None,None,None,None,None,None,None,None,None
        review_stats_table=review.find('div', class_="review-stats").find('table', class_="review-ratings")
        trs=review_stats_table.find_all('tr')
        for tr in trs:
            header=tr.find('td',class_="review-rating-header").text.strip()
            value=tr.find('td',class_="review-value")
            stars=tr.find('td',class_="review-rating-stars stars")
            if header=='Type Of Traveller':
                type_of_traveller=value.text.strip()
            elif header=='Seat Type':
                seat_type=value.text.strip()
            elif header=='Route' :
                route=value.text.strip()
            elif header=='Date Flown':
                date_flown=value.text.strip()
            elif header=='Recommended':
                recommended=value.text.strip().capitalize()
            elif header=='Seat Comfort':
                star_count=len(stars.find_all('span', class_="star fill"))
                seat_comfort=star_count
            elif header=='Cabin Staff Service':
                star_count=len(stars.find_all('span', class_="star fill"))
                cabin_staff_service=star_count
            elif header=='Food & Beverages':
                star_count=len(stars.find_all('span', class_="star fill"))
                f_b=star_count
            elif header=='Inflight Entertainment':
                star_count=len(stars.find_all('span', class_="star fill"))
                entertainment=star_count
            elif header=='Ground Service':
                star_count=len(stars.find_all('span', class_="star fill"))
                ground_service=star_count
            elif header=='Wifi & Connectivity':
                star_count=len(stars.find_all('span', class_="star fill"))
                wifi=star_count
            elif header=='Value For Money':
                star_count=len(stars.find_all('span', class_="star fill"))
                value_for_money=star_count
        review_data['S_No']=count
        review_data['Name'] = name
        review_data['Date'] = date
        review_data['Country'] = country
        review_data['Rating'] = rating
        review_data['Title'] = title
        review_data['Content'] = content
        review_data['Aircraft'] = aircraft_
        review_data['Type Of Traveller'] = type_of_traveller
        review_data['Seat Type']=seat_type
        review_data['Route']=route
        review_data['Date Flown']=date_flown
        review_data['Recommended']=recommended
        review_data['Seat Comfort']=seat_comfort
        review_data['Cabin Staff Service']=cabin_staff_service
        review_data['Food & Beverages']=f_b
        review_data['Inflight Entertainment']=entertainment
        review_data['Ground Service']=ground_service
        review_data['Wifi & Connectivity']=wifi
        review_data['Value For Money']=value_for_money
        data.append(review_data)


In [None]:
#Making the scraped data into a DataFrame
df = pd.DataFrame(data)
pd.set_option('display.expand_frame_repr', False)
df.drop(['S_No'],axis=1,inplace=True)
df

In [None]:
sentiment_df=df[['Name','Route','Title','Content', 'Rating']]
sentiment_df

In [None]:
#Using SentimentIntensityAnalyzer from Vader to do Sentiment Analysis
sia = SentimentIntensityAnalyzer()

sentiments = []

for title in df['Content']:
    sentiment = sia.polarity_scores(title)
    sentiments.append(sentiment)

df['Sentiment'] = sentiments

print(df[['Content', 'Sentiment']])

In [None]:
#Seperating Neg, Neu, Pos and Compound into different columns in DataFrame
sentiments = []

for title in df['Content']:
    sentiment = sia.polarity_scores(title)
    sentiments.append(sentiment)

df['Sentiment'] = sentiments

df['Negative'] = [s['neg'] for s in df['Sentiment']]
df['Neutral'] = [s['neu'] for s in df['Sentiment']]
df['Positive'] = [s['pos'] for s in df['Sentiment']]
df['Compound'] = [s['compound'] for s in df['Sentiment']]

df.drop('Sentiment', axis=1, inplace=True)

df[['Title', 'Negative', 'Neutral', 'Positive', 'Compound']]