### Importing required libraries 

In [1]:
#BeautifulSoup: A library used to parse HTML and extract data from web pages.
from bs4 import BeautifulSoup
#requests: A library to send HTTP requests to websites and get the HTML of web pages.
import requests
#pandas: A library used tool for data manipulation and analysis
import pandas
#csv: A module for writing and handling CSV (Comma Separated Values) files.
import csv
#re: Python’s module for regular expressions, used to match patterns in strings.
import re

In [3]:
review_df = pandas.read_csv('BA_reviews_dataset.csv')

In [5]:
review_df.head()

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,"""never fly with them again""",Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,"""still have not heard any updates""",S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,"""cabin crew were nice""",Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,"""support staff wash their hands of you""",Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"""no fuss, no bother experience""",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...


In [7]:
review_df.dtypes

Title                 object
Author                object
Country               object
Rating               float64
Overall_Rating       float64
Date_Published        object
Type_of_Traveller     object
Seat_Type             object
Route                 object
Recommended           object
Content               object
dtype: object

### Data Preparation / Data Cleaning

In [9]:
# Change date column from object to datetime
#Before using pd.to_datetime(), you can remove the ordinal suffixes ("th", "nd", "st", "rd") using a regular expression.
# Sample DataFrame with problematic date strings
review_df = pandas.read_csv('BA_reviews_dataset.csv')

# Function to remove ordinal suffixes (e.g., "th", "nd", "st", "rd")
def remove_ordinal_suffix(date_str):
    return re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)

# Apply the function to the 'Date_Published' column
review_df['Date_Published'] = review_df['Date_Published'].apply(remove_ordinal_suffix)

# Now convert the cleaned strings to datetime, inferring format for mixed cases
review_df['Date_Published'] = pandas.to_datetime(review_df['Date_Published'], errors='coerce')

# Check the DataFrame to confirm the conversion
print(review_df.dtypes)

Title                        object
Author                       object
Country                      object
Rating                      float64
Overall_Rating              float64
Date_Published       datetime64[ns]
Type_of_Traveller            object
Seat_Type                    object
Route                        object
Recommended                  object
Content                      object
dtype: object


In [11]:
#removes leading and trailing spaces from all columns in a DataFrame
review_df = review_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
review_df.head()

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,"""never fly with them again""",Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,"""still have not heard any updates""",S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,"""cabin crew were nice""",Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,"""support staff wash their hands of you""",Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"""no fuss, no bother experience""",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...


In [15]:
#removes "" from the title column
review_df['Title'] = review_df['Title'].apply(lambda x: x.replace('"', ''))
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"no fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...
...,...,...,...,...,...,...,...,...,...,...,...
3405,British Airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...
3406,British Airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British Airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...
3408,British Airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du..."


In [13]:
# the above steps did not work for all rows,therefore, implemented the function below
# Define a function to clean the title
def clean_title(title):
    if isinstance(title, str):  # Ensure the title is a string
        title = re.sub(r'[“”‘’"]', '', title)  # Remove any type of quote characters
        title = title.strip()  # Remove leading/trailing spaces
    return title

# Apply this cleaning function to the Title column
review_df['Title'] = review_df['Title'].apply(clean_title)

# For extra measure, strip all string columns
review_df = review_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

# Check the result
review_df.head()

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"no fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...


In [15]:
# Capitalize only the first letter of the first word in each sentence in the Title column
review_df['Title'] = review_df['Title'].str.capitalize()
review_df.head()

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...


In [17]:
#Capitalizes the first letter of each word in the author column, while the rest of the letters will be lowercase.
review_df['Author'] = review_df['Author'].str.title()
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...
...,...,...,...,...,...,...,...,...,...,...,...
3405,British airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...
3406,British airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...
3408,British airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du..."


In [19]:
#replace NaN from overall rating
review_df['Overall_Rating'] = review_df['Overall_Rating'].fillna(10)
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...
...,...,...,...,...,...,...,...,...,...,...,...
3405,British airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...
3406,British airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...
3408,British airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du..."


In [21]:
#splits the Content column into two new columns, Verification_Status and Comment, based on the delimiter "|".
# Define a function to handle the conditional splitting
def split_content(content):
    # Check if "|" exists in the content
    if "|" in content:
        # Split into Verification_Status and Comment
        return content.split("|", 1)
    else:
        # If "|" is not found, set Verification_Status to None and Comment to the full content
        return [None, content]

# Apply the function to the 'Content' column and expand into two new columns
review_df[["Verification_Status", "Comment"]] = review_df["Content"].apply(split_content).apply(pandas.Series)

# Display the DataFrame to confirm changes
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content,Verification_Status,Comment
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...,✅ Trip Verified,I recently travelled from Munich to London ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...,Not Verified,I paid for seats 80 A and B on my flight fro...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt...",Not Verified,"The flight wasn’t that bad, although the Infl..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...,✅ Trip Verified,I decided to treat myself and my teenage dau...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...,Not Verified,I was very impressed with their efficient bo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3405,British airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...,,I cannot believe that BA calls their plane fro...
3406,British airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...,,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...,,Travelled JFK - LHR late November. The Concord...
3408,British airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du...",,"We flew ""Business/1st class"" from London to Du..."


In [23]:
#drops duplicates from all columns
review_df = review_df.drop_duplicates()
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content,Verification_Status,Comment
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...,✅ Trip Verified,I recently travelled from Munich to London ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...,Not Verified,I paid for seats 80 A and B on my flight fro...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt...",Not Verified,"The flight wasn’t that bad, although the Infl..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...,✅ Trip Verified,I decided to treat myself and my teenage dau...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...,Not Verified,I was very impressed with their efficient bo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3405,British airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...,,I cannot believe that BA calls their plane fro...
3406,British airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...,,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...,,Travelled JFK - LHR late November. The Concord...
3408,British airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du...",,"We flew ""Business/1st class"" from London to Du..."


In [25]:
# Where route is N/A, extract the route from the comment column (Not working as expected, ask for assistance)
# Adjusted get_route_from_comment function
def get_route_from_comment(comment):
    # Regex pattern to capture country pairs like "Country1 to Country2" or "Country1 - Country2"
    match = re.search(r'(\b\w+\b)\s*(?:to|-)\s*(\b\w+\b)', comment)
    if match:
        # Extract the matched groups
        country1, country2 = match.groups()
        
        # Return the formatted route string
        return f"{country1} to {country2}"
    return None  # Return None if no match is found

# Define a function to update Route based on Comment
def update_route(row):
    # If the Route is 'N/A', we want to attempt to extract a route from Comment
    if row['Route'] == 'N/A':
        route = get_route_from_comment(row['Comment'])
        # If a route is extracted, return it; otherwise, return 'N/A'
        return route if route else 'N/A'
    return row['Route']

# Apply the function to the DataFrame using .loc to avoid SettingWithCopyWarning
review_df.loc[:, 'Route'] = review_df.apply(update_route, axis=1)

# Check the updated DataFrame
review_df

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content,Verification_Status,Comment
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...,✅ Trip Verified,I recently travelled from Munich to London ...
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...,Not Verified,I paid for seats 80 A and B on my flight fro...
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt...",Not Verified,"The flight wasn’t that bad, although the Infl..."
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...,✅ Trip Verified,I decided to treat myself and my teenage dau...
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...,Not Verified,I was very impressed with their efficient bo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3405,British airways customer review,J Smith,United States,1.0,10.0,2014-12-18,,Business Class,,no,I cannot believe that BA calls their plane fro...,,I cannot believe that BA calls their plane fro...
3406,British airways customer review,Boone Pieter,Netherlands,7.0,10.0,2014-12-18,,Business Class,,yes,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...,,AMS-LHR-JNB and CPT-LHR-AMS in club. 767 betwe...
3407,British airways customer review,R King,United Kingdom,8.0,10.0,2014-12-18,,First Class,,yes,Travelled JFK - LHR late November. The Concord...,,Travelled JFK - LHR late November. The Concord...
3408,British airways customer review,Tom Gleinser,United States,2.0,10.0,2014-12-18,,Business Class,,no,"We flew ""Business/1st class"" from London to Du...",,"We flew ""Business/1st class"" from London to Du..."


In [27]:
# add a month and year column in the original dataset
#.loc explicitly specifies where the assignment should happen. This avoids ambiguity between modifying a slice and the parent DataFrame.
review_df.loc[:, 'Month_Posted'] = review_df['Date_Published'].dt.month
review_df.loc[:, 'Year_Posted'] = review_df['Date_Published'].dt.year
review_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  review_df.loc[:, 'Month_Posted'] = review_df['Date_Published'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  review_df.loc[:, 'Year_Posted'] = review_df['Date_Published'].dt.year


Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content,Verification_Status,Comment,Month_Posted,Year_Posted
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...,✅ Trip Verified,I recently travelled from Munich to London ...,11,2024
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...,Not Verified,I paid for seats 80 A and B on my flight fro...,11,2024
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt...",Not Verified,"The flight wasn’t that bad, although the Infl...",11,2024
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...,✅ Trip Verified,I decided to treat myself and my teenage dau...,11,2024
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...,Not Verified,I was very impressed with their efficient bo...,11,2024


In [29]:
# Save to CSV without the index
review_df.to_csv("BA_clean_dataset.csv", index=False)

print("Dataset saved as BA_clean_dataset.csv")

Dataset saved as BA_clean_dataset.csv


In [31]:
review_df.head()

Unnamed: 0,Title,Author,Country,Rating,Overall_Rating,Date_Published,Type_of_Traveller,Seat_Type,Route,Recommended,Content,Verification_Status,Comment,Month_Posted,Year_Posted
0,Never fly with them again,Erika Greyling,United Kingdom,1.0,10.0,2024-11-03,Couple Leisure,Economy Class,Munich to London Heathrow,no,✅ Trip Verified | I recently travelled from ...,✅ Trip Verified,I recently travelled from Munich to London ...,11,2024
1,Still have not heard any updates,S Wozniak,United States,3.0,10.0,2024-11-03,Couple Leisure,Premium Economy,Heathrow to Boston,no,Not Verified | I paid for seats 80 A and B on...,Not Verified,I paid for seats 80 A and B on my flight fro...,11,2024
2,Cabin crew were nice,Barnaby Emmerson,United Kingdom,7.0,10.0,2024-11-03,Family Leisure,Economy Class,Los Angeles to London Heathrow,yes,"Not Verified | The flight wasn’t that bad, alt...",Not Verified,"The flight wasn’t that bad, although the Infl...",11,2024
3,Support staff wash their hands of you,Charlotte Parsons,United Kingdom,1.0,10.0,2024-11-02,Family Leisure,Premium Economy,Vancouver to London,no,✅ Trip Verified | I decided to treat myself a...,✅ Trip Verified,I decided to treat myself and my teenage dau...,11,2024
4,"No fuss, no bother experience",R. Wrightman,Canada,9.0,10.0,2024-11-02,Solo Leisure,Economy Class,Vancouver to Gatwick,yes,Not Verified | I was very impressed with thei...,Not Verified,I was very impressed with their efficient bo...,11,2024
