In [1]:
###  How the price changes with change in Source and Destination?

In [2]:
import numpy as np
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots


%matplotlib inline
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline

import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('clean_dataset.csv')

df['destinations'] = df['source_city'] + '_' + df['destination_city']

df.drop(df.columns[0], axis=1, inplace=True)

df

Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price,destinations
0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,5953,Delhi_Mumbai
1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,5953,Delhi_Mumbai
2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,5956,Delhi_Mumbai
3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,5955,Delhi_Mumbai
4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,5955,Delhi_Mumbai
...,...,...,...,...,...,...,...,...,...,...,...,...
300148,Vistara,UK-822,Chennai,Morning,one,Evening,Hyderabad,Business,10.08,49,69265,Chennai_Hyderabad
300149,Vistara,UK-826,Chennai,Afternoon,one,Night,Hyderabad,Business,10.42,49,77105,Chennai_Hyderabad
300150,Vistara,UK-832,Chennai,Early_Morning,one,Night,Hyderabad,Business,13.83,49,79099,Chennai_Hyderabad
300151,Vistara,UK-828,Chennai,Early_Morning,one,Evening,Hyderabad,Business,10.00,49,81585,Chennai_Hyderabad


In [20]:
economy_flights = df[df['class'] == 'Economy']

fig = px.scatter(df, x="source_city", y="destination_city", size="price", 
                 hover_name="airline", title="Price by Source and Destination for Economy Class")

fig_economy = px.scatter(economy_flights, x="source_city", y="destination_city", color="price", 
                         hover_data=["class"])

# fig.add_trace(fig_economy.data[0])

# # fig.show()

In [22]:
business_flights = df[df['class'] == 'Business']

fig = px.scatter(df, x="source_city", y="destination_city", size="price", 
                 hover_name="airline", title="Price by Source and Destination for Business Class")

fig_business = px.scatter(business_flights, x="source_city", y="destination_city", color="price", 
                         hover_data=["class"])

# fig.add_trace(fig_business.data[0])

# fig.show()

In [6]:
delhi = df[df['source_city'] == 'Delhi']
delhi_rtn = df[df['destination_city'] == 'Delhi']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Delhi', 'Flight Prices to Delhi'))

agg_df_dest = delhi.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = delhi_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
fig.add_trace(scatter_src, row=1, col=2)

hist_dest = go.Histogram(x=delhi['class'], name='Class', nbinsx=5)
fig.add_trace(hist_dest, row=1, col=1)

hist_src = go.Histogram(x=delhi_rtn['class'], name='Class', nbinsx=5)
fig.add_trace(hist_src, row=1, col=2)

fig.update_layout(title='Flight Prices According To Delhi With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# fig.show()

In [18]:
mumbai = df[df['source_city'] == 'Mumbai']
mumbai_rtn = df[df['destination_city'] == 'Mumbai']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Mumbai', 'Flight Prices to Mumbai'))

agg_df_dest = mumbai.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = mumbai_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
# fig.add_trace(scatter_src, row=1, col=2)

# hist_dest = go.Histogram(x=mumbai['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_dest, row=1, col=1)

# hist_src = go.Histogram(x=mumbai_rtn['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_src, row=1, col=2)

# fig.update_layout(title='Flight Prices According To Mumbai With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# # fig.show()

In [14]:
bangalore = df[df['source_city'] == 'Bangalore']
bangolore_rtn = df[df['destination_city'] == 'Bangalore']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Bangalore', 'Flight Prices to Bangalore'))

agg_df_dest = bangalore.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = bangolore_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
# fig.add_trace(scatter_src, row=1, col=2)

# hist_dest = go.Histogram(x=bangalore['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_dest, row=1, col=1)

# hist_src = go.Histogram(x=bangolore_rtn['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_src, row=1, col=2)

# # fig.update_layout(title='Flight Prices According To Bangalore With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# # # fig.show()

In [15]:
kolkota = df[df['source_city'] == 'Kolkata']
kolkota_rtn = df[df['destination_city'] == 'Kolkata']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Kolkata', 'Flight Prices to Kolkata'))

agg_df_dest = kolkota.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = kolkota_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
# fig.add_trace(scatter_src, row=1, col=2)

# hist_dest = go.Histogram(x=kolkota['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_dest, row=1, col=1)

# hist_src = go.Histogram(x=kolkota_rtn['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_src, row=1, col=2)

# fig.update_layout(title='Flight Prices According To Kolkota With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# # fig.show()

In [16]:
chennai = df[df['source_city'] == 'Chennai']
chennai_rtn = df[df['destination_city'] == 'Chennai']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Chennai', 'Flight Prices to Chennai'))

agg_df_dest = chennai.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = chennai_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
# fig.add_trace(scatter_src, row=1, col=2)

# hist_dest = go.Histogram(x=chennai['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_dest, row=1, col=1)

# hist_src = go.Histogram(x=chennai_rtn['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_src, row=1, col=2)

# fig.update_layout(title='Flight Prices According To Chennai With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# # fig.show()

In [17]:
hyderabad = df[df['source_city'] == 'Hyderabad']
hyderabad_rtn = df[df['destination_city'] == 'Hyderabad']

fig = make_subplots(rows=1, cols=2, subplot_titles=('Flight Prices from Hyderabad', 'Flight Prices to Hyderabad'))

agg_df_dest = hyderabad.groupby('destination_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_dest = go.Scatter(x=agg_df_dest['destination_city'], y=agg_df_dest['mean'], mode='markers+lines', 
                          name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                          text=[agg_df_dest['min'], agg_df_dest['max']])
fig.add_trace(scatter_dest, row=1, col=1)

agg_df_src = hyderabad_rtn.groupby('source_city')['price'].agg(['min', 'mean', 'max']).reset_index()
scatter_src = go.Scatter(x=agg_df_src['source_city'], y=agg_df_src['mean'], mode='markers+lines', 
                         name='Price', hovertemplate='Mean: %{y:.0f}<br>Min: %{text[0]:.0f}<br>Max: %{text[1]:.0f}',
                         text=[agg_df_src['min'], agg_df_src['max']])
# fig.add_trace(scatter_src, row=1, col=2)

# hist_dest = go.Histogram(x=hyderabad['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_dest, row=1, col=1)

# hist_src = go.Histogram(x=hyderabad_rtn['class'], name='Class', nbinsx=5)
# fig.add_trace(hist_src, row=1, col=2)

# fig.update_layout(title='Flight Prices According To Hyderabad With Classes', showlegend=True, legend_title_text='Price Range', template='plotly_dark')
# # fig.show()

### According to the research we conducted based on each location, we saw that the number of economy flights is in all probability almost twice as much as business flights. In addition, we saw that the highest price in economy flights reached up to 40 thousand rupees, and this amount was around 120 thousand in business.