In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")
import plotly.express as px 
import plotly.graph_objects as go 
import plotly as py 
from plotly import tools
import os
import calendar

In [None]:
airway_path="/Users/jaideepsai/Desktop/DATA-ANALYTICS/Data Source/extracted-data/British Airways/preprocessed_airline_dataset.csv"
airway_data=pd.read_csv(airway_path, delimiter=',')

In [None]:
airway_data.head()

In [None]:
airway_data.info()

In [None]:
airway_data.isnull().sum()

In [None]:
airway_data = airway_data[~(airway_data.traveller_type.isnull())]

In [None]:
airway_data.isnull().sum()

In [None]:
airway_data["date"]=pd.to_datetime(airway_data["date"],format='%d-%m-%Y')

In [None]:
airway_data.pivot_table(index = "seat_type", columns ="traveller_type", values = "rating", aggfunc = lambda x: np.quantile(x, 0.5))

In [None]:
airway_data.pivot_table(index = "seat_type", columns ="traveller_type", values = "rating", aggfunc ="mean")

In [None]:
airway_data["year"]=pd.DatetimeIndex(airway_data["date"]).year

In [None]:
airway_data.groupby("year")[['rating','seat_comfort','cabin_staff_service','food_beverages','ground_service','value_for_money']].mean()

In [None]:
sns.countplot(x = "seat_type", data = airway_data )
plt.show()


In [None]:
sns.countplot(x = "seat_type", hue = "rating", data = airway_data )
plt.show()

In [None]:
sns.countplot(x = "seat_type", hue = "recommended", data = airway_data )

In [None]:
sns.countplot(x = "rating", data = airway_data )
plt.show()

In [None]:
sns.countplot(x = "rating", hue = "trip_verified", data = airway_data )

In [None]:
airway_data.trip_verified.value_counts(normalize = True) * 100

In [None]:
date_aircraft_df = airway_data[['date_flown', 'aircraft']]
date_aircraft_count = date_aircraft_df.groupby(['date_flown', 'aircraft']).size().reset_index(name='count')
most_flown_date = date_aircraft_count.loc[date_aircraft_count['count'].idxmax()]

In [None]:
plt.figure(figsize=(16, 8))
sns.barplot(x='aircraft', y='count', data=date_aircraft_count[date_aircraft_count['date_flown'] == most_flown_date['date_flown']])
plt.title(f'Most Flown Date: {most_flown_date["date_flown"]} - Aircraft Distribution')
plt.xlabel('Aircraft')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
rating_threshold = 8

filtered_aircraft_rating_df = airway_data[airway_data['rating'] > rating_threshold][['aircraft', 'rating']]

average_rating_per_aircraft = filtered_aircraft_rating_df.groupby('aircraft')['rating'].mean().reset_index(name='average_rating')
top_rated_aircraft = average_rating_per_aircraft.loc[average_rating_per_aircraft['average_rating'].idxmax()]


In [None]:
# Plotting the bar plot for the most highly rated aircraft above the threshold
plt.figure(figsize=(12, 6))
sns.barplot(x='aircraft', y='average_rating', data=average_rating_per_aircraft, color='skyblue')
plt.title(f'Highly Rated Aircraft (Above {rating_threshold} Rating)')
plt.xlabel('Aircraft')
plt.ylabel('Average Rating')
plt.xticks(rotation=90, ha='right')
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize =(25,12) )
plt.xscale("log")
sns.countplot(y = "place", data = airway_data)

In [None]:
airway_data.pivot_table(index = "place", columns ="seat_type", values = "rating", aggfunc ="mean")

In [None]:
res = airway_data[['rating', 'seat_comfort', 'cabin_staff_service','food_beverages', 'ground_service', 'value_for_money']].corr()

In [None]:
sns.heatmap(res,annot = True, cmap = "viridis")

In [None]:
columns_to_keep = ['place', 'aircraft', 'rating']
df_cleaned = airway_data[columns_to_keep].dropna()

In [None]:
def suggest_aircraft(place, rating):
    filtered_df = df_cleaned[(df_cleaned['place'] == place) & (df_cleaned['rating'] > rating)]

    if not filtered_df.empty:
        suggested_aircrafts = filtered_df[['aircraft', 'rating']]

        suggested_aircrafts = suggested_aircrafts.sort_values(by='rating', ascending=False)

        return suggested_aircrafts
    else:
        return f"No suggested aircrafts found for {place} with a rating higher than {rating}"

available_places = df_cleaned['place'].unique()

In [None]:
# Example place and rating values (replace with your desired values)
selected_place = "India"
selected_rating = 4.0

if selected_place not in available_places:
    print("Invalid place. Please choose from the available options.")
    print("Available Places:", ", ".join(available_places))
else:
    suggestion = suggest_aircraft(selected_place, selected_rating)

    if not suggestion.empty:
        print("Suggested Aircrafts:")
        print(suggestion)
    else:
        print(f"No suggested aircrafts found for {selected_place} with a rating higher than {selected_rating}")