# Air France Reviews Dataset

In [None]:
## importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
## loading the data
reviews_data = pd.read_csv(
    "/kaggle/input/air-france-reviews-dataset/airfrance_tripadvisor_reviews.csv"
)

reviews_data.head()

In [None]:
## renaming columns
reviews_data.rename(columns = {"publishedDate" : "date"}, inplace = True)
reviews_data.head()

In [None]:
## ordering columns
reviews_data = reviews_data[['date', 'title', 'text', 'rating']]

In [None]:
## final view
reviews_data.head()

In [None]:
## checking dtypes
reviews_data.info()

In [None]:
## changing datetime
reviews_data['date'] = pd.to_datetime(
    reviews_data['date']
)

In [None]:
## checking the dtypes
reviews_data.info()

## EDA and Visualization

In [None]:
# Plotting the pie chart
reviews_data.rating.value_counts().plot(
    kind='pie',
    autopct='%1.1f%%',  # Display percentage
    colors=plt.cm.Paired.colors,  # Color palette
    title='Variation of Ratings',
    ylabel='',  # No label for the y-axis
    legend=True,  # Show legend
    figsize=(6, 6)  # Size of the plot
)

plt.ylabel('')  # Remove the ylabel to clean up the plot
plt.show()

In [None]:
## visualizing the ratings daywise - Any Trend to Follow up

daywise_ratings = reviews_data.groupby(['date'])['rating'].mean().round(2)

# Plotting the trend of ratings over time
plt.figure(figsize=(10, 6))
daywise_ratings.plot(marker='o', linestyle='-', color='b', title='Daywise Average Ratings')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Not Much appealing, so will be moving towards, `monthwise ratings`

In [None]:
## visualizing the ratings Monthwise - Any Trend to Follow up

monthwise_ratings = reviews_data.groupby(reviews_data['date'].dt.to_period('M'))['rating'].mean().round(2)
# monthwise_ratings

# Plotting the trend of ratings over time
plt.figure(figsize=(10, 6))
monthwise_ratings.plot(linestyle='-', color='g', title='Monthwise Average Ratings')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

<p> -----
    Clearly visible that ratings have been down post pandemic
    -----
</p>

In [None]:
## yearly reviews count
yearly_reviews = reviews_data.groupby([reviews_data['date'].dt.to_period('A'), 'rating'])['date'].size().reset_index(name='count')

## yearly total reviews
yearly_total_reviews = yearly_reviews.groupby(['date'])['count'].sum().reset_index(name = 'total_reviews')

## merge total reviews back to the actual data
yearly_reviews = yearly_reviews.merge(yearly_total_reviews, on = 'date')

## percentage of reviews yearwise
yearly_reviews['review_pct'] = (100.0 * yearly_reviews['count']/yearly_reviews['total_reviews']).round(2)

# Pivot the table for a cleaner view (optional)
yearly_reviews_pivot = yearly_reviews.pivot(index='date', columns='rating', values='review_pct').fillna(0)

In [None]:
yearly_reviews_pivot

In [None]:
# Plotting yearly reviews count (optional)
plt.figure(figsize=(10, 6))
yearly_reviews_pivot.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8), title='Yearly Review Counts by Rating')
plt.xlabel('Year')
plt.ylabel('Number of Reviews')
plt.legend(title='Rating')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
reviews_data.sample(10)

## Time Period Segmentation

*Divide the data into meaningful periods*
  1. #### Pre-pandemic (2016-2018)
  2. #### Pandemic (2019-2020)
  3. #### Post-pandemic (2021-2024)

In [None]:
## creating segments of the reviews

prepandemic_df = reviews_data[reviews_data['date'] <= '2018-12-31']
prepandemic_df.date.max(), prepandemic_df.date.min()