### What is most and less used flights in the whole flights for each airliners and what is the most and less used flights according to destinations and classes?

In [3]:
import numpy as np
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline

import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('clean_dataset.csv')

df['destinations'] = df['source_city'] + '_' + df['destination_city']

df.drop(df.columns[0], axis=1, inplace=True)

df

Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price,destinations
0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,5953,Delhi_Mumbai
1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,5953,Delhi_Mumbai
2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,5956,Delhi_Mumbai
3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,5955,Delhi_Mumbai
4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,5955,Delhi_Mumbai
...,...,...,...,...,...,...,...,...,...,...,...,...
300148,Vistara,UK-822,Chennai,Morning,one,Evening,Hyderabad,Business,10.08,49,69265,Chennai_Hyderabad
300149,Vistara,UK-826,Chennai,Afternoon,one,Night,Hyderabad,Business,10.42,49,77105,Chennai_Hyderabad
300150,Vistara,UK-832,Chennai,Early_Morning,one,Night,Hyderabad,Business,13.83,49,79099,Chennai_Hyderabad
300151,Vistara,UK-828,Chennai,Early_Morning,one,Evening,Hyderabad,Business,10.00,49,81585,Chennai_Hyderabad


In [6]:
# This code considers not only the most used flight for each airline but also the corresponding source and destination cities.

max_used_flights_destinations = df.groupby(['airline', 'flight', 'source_city', 'destination_city']).size().groupby('airline').idxmax().apply(lambda x: x[1:])
max_used_flights_destinations

airline
AirAsia        (I5-1529, Delhi, Kolkata)
Air_India      (AI-440, Chennai, Mumbai)
GO_FIRST       (G8-227, Kolkata, Mumbai)
Indigo       (6E-6205, Delhi, Bangalore)
SpiceJet        (SG-612, Chennai, Delhi)
Vistara        (UK-706, Kolkata, Mumbai)
dtype: object

In [7]:
min_used_flights_destinations = df.groupby(['airline', 'flight', 'source_city', 'destination_city']).size().groupby('airline').idxmin().apply(lambda x: x[1:])
min_used_flights_destinations

airline
AirAsia       (I5-2461, Bangalore, Delhi)
Air_India      (AI-406, Delhi, Bangalore)
GO_FIRST          (G8-107, Delhi, Mumbai)
Indigo       (6E-132, Hyderabad, Kolkata)
SpiceJet        (SG-1058, Kolkata, Delhi)
Vistara       (UK-655, Mumbai, Bangalore)
dtype: object

In [14]:
max_flight_counts = ['I5-1529', 'AI-440', 'G8-227', '6E-6205', 'SG-612', 'UK-706']

airlines = df['airline'].unique()

fig = go.Figure()

for airline in airlines:
    counts = []
    for flight in max_flight_counts:
        count = df[(df['flight'] == flight) & (df['airline'] == airline)]['price'].count()
        counts.append(count)
    fig.add_trace(go.Bar(name=f'{airline} - Flight Counts', x=max_flight_counts, y=counts))

class_counts = []
for flight in max_flight_counts:
    economy_count = df[(df['flight'] == flight) & (df['class'] == 'Economy')]['price'].count()
    business_count = df[(df['flight'] == flight) & (df['class'] == 'Business')]['price'].count()
    class_counts.append((economy_count, business_count))

economy_counts, business_counts = zip(*class_counts)

fig.add_trace(go.Bar(name='Economy Class', x=max_flight_counts, y=economy_counts))
fig.add_trace(go.Bar(name='Business Class', x=max_flight_counts, y=business_counts))

fig.update_layout(barmode='group', title='Maximum Flight Counts by Airline, Flight Number and Class', xaxis_title='Flight Number', yaxis_title='Count')

fig.show()

In [15]:
min_flight_counts = ['I5-2461', 'AI-406', 'G8-107', '6E-132', 'SG-1058', 'UK-655']

airlines = df['airline'].unique()

fig = go.Figure()

for airline in airlines:
    counts = []
    for flight in min_flight_counts:
        count = df[(df['flight'] == flight) & (df['airline'] == airline)]['price'].count()
        counts.append(count)
    fig.add_trace(go.Bar(name=f'{airline} - Flight Counts', x=min_flight_counts, y=counts))

class_counts = []
for flight in min_flight_counts:
    economy_count = df[(df['flight'] == flight) & (df['class'] == 'Economy')]['price'].count()
    business_count = df[(df['flight'] == flight) & (df['class'] == 'Business')]['price'].count()
    class_counts.append((economy_count, business_count))

economy_counts, business_counts = zip(*class_counts)

fig.add_trace(go.Bar(name='Economy Class', x=min_flight_counts, y=economy_counts))
fig.add_trace(go.Bar(name='Business Class', x=min_flight_counts, y=business_counts))

fig.update_layout(barmode='group', title='Minimum Flight Counts by Airline, Flight Number and Class', xaxis_title='Flight Number', yaxis_title='Count')

fig.show()

In [12]:
# This code considers only the most used flight for each airline, ignoring the source and destination cities.

max_used_flights = df.groupby(['airline', 'flight']).size().groupby('airline').idxmax().values

flight_list = [flight[1] for flight in max_used_flights]
airline_list = [flight[0] for flight in max_used_flights]
count_list = [df[(df['airline'] == airline) & (df['flight'] == flight)].shape[0] for airline, flight in max_used_flights]
source_city_list = [df[(df['airline'] == airline) & (df['flight'] == flight)]['source_city'].iloc[0] for airline, flight in max_used_flights]
destination_city_list = [df[(df['airline'] == airline) & (df['flight'] == flight)]['destination_city'].iloc[0] for airline, flight in max_used_flights]

fig = go.Figure([go.Bar(x=airline_list, y=count_list, text=flight_list, 
                         textposition='auto', 
                         hovertemplate='Airline: %{x}<br>Flight: %{text}<br>Count: %{y}<br>Source City: %{customdata[0]}<br>Destination City: %{customdata[1]}',
                         customdata=list(zip(source_city_list, destination_city_list)))])

fig.update_layout(title='Most Used Flights for Each Airline', xaxis_title='Airline', yaxis_title='Count', 
                  template='plotly_dark')
fig.show()

In [13]:
# This code considers only the less used flight for each airline, ignoring the source and destination cities.


min_used_flights = df.groupby(['airline', 'flight']).size().groupby('airline').idxmin().values

flight_list = [flight[1] for flight in min_used_flights]
airline_list = [flight[0] for flight in min_used_flights]
count_list = [df[(df['airline'] == airline) & (df['flight'] == flight)].shape[0] for airline, flight in min_used_flights]
source_city_list = [df[(df['airline'] == airline) & (df['flight'] == flight)]['source_city'].iloc[0] for airline, flight in min_used_flights]
destination_city_list = [df[(df['airline'] == airline) & (df['flight'] == flight)]['destination_city'].iloc[0] for airline, flight in min_used_flights]

fig = go.Figure([go.Bar(x=airline_list, y=count_list, text=flight_list, 
                         textposition='auto', 
                         hovertemplate='Airline: %{x}<br>Flight: %{text}<br>Count: %{y}<br>Source City: %{customdata[0]}<br>Destination City: %{customdata[1]}',
                         customdata=list(zip(source_city_list, destination_city_list)))])

fig.update_layout(title='Less Used Flights for Each Airline', xaxis_title='Airline', yaxis_title='Count', 
                  template='plotly_dark')
fig.show()

### In this study, we visualized each airline company's aircraft (with flight numbers) with the most and least flights, the number of these flights, their class distribution and their locations.