In [2]:
import pandas as pd
df=pd.read_csv('../DATA/BA_reviews.csv',index_col=0)

In [None]:
df.head(20)


In [None]:
!pip install matplotlib

In [None]:
import matplotlib.pyplot as plt


labels = ['Recommended (Yes)', 'Not Recommended (No)']
sizes = df['Recommended'].value_counts()


if 'no' in sizes.index and 'yes' in sizes.index:
    labels = ['Not Recommended (No)', 'Recommended (Yes)']

plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=['lightcoral', 'lightgreen'])
plt.title('Proportion of Recommended vs. Not Recommended Reviews')
plt.savefig('../Visualization/recommendation_pie_chart.jpg', bbox_inches='tight', dpi=720)
plt.show()


In [None]:
import matplotlib.pyplot as plt

sentiment_labels = ["Positive", "Negative"]
sizes = df['Sentiment'].value_counts()


if 'Negative' in sizes.index and 'Positive' in sizes.index:
    sentiment_labels = ['Negative', 'Positive']

plt.pie(sizes, labels=sentiment_labels, autopct='%1.1f%%', startangle=90, colors=['red', 'green'])
plt.title('Positive Reviews vs. Negative Reviews')
plt.savefig('../Visualization/positive_vs_negative_pie_chart.jpg', bbox_inches='tight', dpi=720)
plt.show()


In [None]:
print( df['Seat Type'].value_counts())
print('-------------------------------------------------')
print( df['Recommended'].value_counts())

In [None]:
import seaborn as sns
df = df[df['Seat Type'] != '0']
contingency_table = pd.crosstab(df['Seat Type'], df['Recommended'], margins=True, margins_name='Total')

# Calculating the percentage of 'Yes' and 'No' for each seat type in the df
contingency_table_percentage = contingency_table.div(contingency_table['Total'], axis=0) * 100

# Heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(contingency_table_percentage[['no', 'yes']], annot=True, cmap="YlGnBu", fmt=".1f", linewidths=.5, cbar_kws={'label': 'Percentage'})
plt.title('Percentage of Yes and No Recommendations for Each Seat Type')
plt.xlabel('Recommended')
plt.ylabel('Seat Type')
plt.savefig('../Visualization/seattype_VS_recommendation.jpg',bbox_inches='tight',dpi=720)
plt.show()

In [None]:

average_ratings = df[['Seat Comfort Rating', 'Cabin Staff Service Rating', 'Food Beverages Rating',
                      'Inflight Entertainment Rating', 'Ground Service Rating', 'Value for Money Rating']].mean()


plt.figure(figsize=(18, 8))
sns.barplot(x=average_ratings.index, y=average_ratings.values) 
plt.title('Average Ratings for Different Categories', fontsize=16)
plt.ylim(0, 5)  
plt.ylabel('Average Rating', fontsize=14)
plt.xlabel('Rating Category', fontsize=1)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig('../Visualization/MeanRating.jpg', bbox_inches='tight', dpi=720)
plt.show()


In [None]:
rating_columns = ['Seat Comfort Rating', 'Cabin Staff Service Rating', 'Food Beverages Rating',
                   'Inflight Entertainment Rating', 'Ground Service Rating', 'Value for Money Rating']
percentage_df = pd.DataFrame()

for col in rating_columns:
    rating_counts = df.groupby(col)['Recommended'].value_counts(normalize=True).unstack()
    percentage_df[col] = rating_counts['yes'] * 100

#heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(percentage_df.T, annot=True, cmap="YlGnBu", fmt=".1f", linewidths=.5, cbar_kws={'label': 'Percentage'})
plt.title('Percentage of Yes Recommendations for Each Rating Category', fontsize=16)
plt.xlabel('Rating Category', fontsize=14)
plt.ylabel('Percentage of Yes Recommendations', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig('../Visualization/Rating_Vs_Yes-recommendations.jpg', bbox_inches='tight', dpi=720)
plt.show()