In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

#Key Questions?

1.Which airline has the highest average price?

2.How does the price vary with the class of travel?

3.Is there a correlation between flight duration and price?

4.Which routes are the most popular?

5.How does the price vary across different source and destination cities?

6.Departure and Arrival Time Counts?

7.Number of Stops by Destination City?

8.Average Flight Duration by Airline?

In [None]:
df=pd.read_csv('/content/drive/MyDrive/Data/indian airline.csv')

In [None]:
df

Unnamed: 0.1,Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price
0,0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,5953
1,1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,5953
2,2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,5956
3,3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,5955
4,4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,5955
...,...,...,...,...,...,...,...,...,...,...,...,...
300148,300148,Vistara,UK-822,Chennai,Morning,one,Evening,Hyderabad,Business,10.08,49,69265
300149,300149,Vistara,UK-826,Chennai,Afternoon,one,Night,Hyderabad,Business,10.42,49,77105
300150,300150,Vistara,UK-832,Chennai,Early_Morning,one,Night,Hyderabad,Business,13.83,49,79099
300151,300151,Vistara,UK-828,Chennai,Early_Morning,one,Evening,Hyderabad,Business,10.00,49,81585


In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price
0,0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,5953
1,1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,5953
2,2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,5956
3,3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,5955
4,4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,5955


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300153 entries, 0 to 300152
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Unnamed: 0        300153 non-null  int64  
 1   airline           300153 non-null  object 
 2   flight            300153 non-null  object 
 3   source_city       300153 non-null  object 
 4   departure_time    300153 non-null  object 
 5   stops             300153 non-null  object 
 6   arrival_time      300153 non-null  object 
 7   destination_city  300153 non-null  object 
 8   class             300153 non-null  object 
 9   duration          300153 non-null  float64
 10  days_left         300153 non-null  int64  
 11  price             300153 non-null  int64  
dtypes: float64(1), int64(3), object(8)
memory usage: 27.5+ MB


In [None]:
df.describe()

Unnamed: 0.1,Unnamed: 0,duration,days_left,price
count,300153.0,300153.0,300153.0,300153.0
mean,150076.0,12.221021,26.004751,20889.660523
std,86646.852011,7.191997,13.561004,22697.767366
min,0.0,0.83,1.0,1105.0
25%,75038.0,6.83,15.0,4783.0
50%,150076.0,11.25,26.0,7425.0
75%,225114.0,16.17,38.0,42521.0
max,300152.0,49.83,49.0,123071.0


In [None]:
df.shape

(300153, 12)

In [None]:
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
airline,0
flight,0
source_city,0
departure_time,0
stops,0
arrival_time,0
destination_city,0
class,0
duration,0


In [None]:
df.columns

Index(['Unnamed: 0', 'airline', 'flight', 'source_city', 'departure_time',
       'stops', 'arrival_time', 'destination_city', 'class', 'duration',
       'days_left', 'price'],
      dtype='object')

#1.Which airline has the highest average price?

In [None]:
airline_avg_price = df.groupby('airline')['price'].mean().reset_index()

In [None]:
sorted_airlines = airline_avg_price.sort_values('price', ascending=False)

In [None]:
fig = px.bar(sorted_airlines, x='airline', y='price',
             title='Average Price by Airline',
             labels={'airline': 'Airline', 'price': 'Average Price'})
fig.show()

#2.How does the price vary with the class of travel?

In [None]:
class_avg_price = df.groupby('class')['price'].mean().reset_index()

In [None]:
colors = ['skyblue', 'coral', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=class_avg_price['class'],
                             values=class_avg_price['price'],
                             hole=.3,
                             marker_colors=colors)])

fig.update_layout(title_text="Average Price by Travel Class")
fig.show()

#3.Is there a correlation between flight duration and price?

In [None]:
correlation = df['duration'].corr(df['price'])
print(f"Correlation between flight duration and price: {correlation}")

Correlation between flight duration and price: 0.20422236784542702


In [None]:
fig = px.scatter(df, x='duration', y='price',
                 title='Correlation between Flight Duration and Price',
                 trendline="ols")  # Add a trendline
fig.show()

Output hidden; open in https://colab.research.google.com to view.

#4.Which routes are the most popular?

In [None]:
route_counts = df.groupby(['source_city', 'destination_city']).size().reset_index(name='count')
route_counts = route_counts.sort_values('count', ascending=False)
top_routes = route_counts.head(10)

In [None]:
fig = px.pie(top_routes,
             values='count',
             names=['-'.join(x) for x in zip(top_routes['source_city'], top_routes['destination_city'])],
             title='Most Popular Routes',
             color_discrete_sequence=px.colors.qualitative.Pastel)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

#5.How does the price vary across different source and destination cities?

In [None]:
city_avg_price = df.groupby(['source_city', 'destination_city'])['price'].mean().reset_index()

In [None]:
fig = px.bar(city_avg_price, x='source_city', y='price', color='destination_city',
             title='Average Price by Source and Destination Cities',
             labels={'source_city': 'Source City', 'price': 'Average Price', 'destination_city': 'Destination City'},
             barmode='group')  # Use 'group' for grouped bars
fig.update_layout(xaxis_tickangle=-45)  # Rotate

#6.Departure and Arrival Time Counts

In [None]:
departure_counts = df['departure_time'].value_counts().reset_index()
departure_counts.columns = ['departure_time', 'count']

arrival_counts = df['arrival_time'].value_counts().reset_index()
arrival_counts.columns = ['arrival_time', 'count']

In [None]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=departure_counts['departure_time'],
    y=departure_counts['count'],
    name='Departure Times'))

fig.add_trace(go.Bar(
    x=arrival_counts['arrival_time'],
    y=arrival_counts['count'],
    name='Arrival Times'))

fig.update_layout(
    title='Departure and Arrival Time Counts',
    xaxis_title='Time',
    yaxis_title='Count',
    barmode='group')  # To display bars side-by-side

fig.show()

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,airline,flight,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price
0,0,SpiceJet,SG-8709,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,5953
1,1,SpiceJet,SG-8157,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,5953
2,2,AirAsia,I5-764,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,5956
3,3,Vistara,UK-995,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,5955
4,4,Vistara,UK-963,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,5955


#7.Number of Stops by Destination City

In [None]:
stops_by_destination = df.groupby(['destination_city', 'stops'])['stops'].count().reset_index(name='count')

In [None]:
fig = px.bar(stops_by_destination,
             x='destination_city',
             y='count',
             color='stops',
             title='Number of Stops by Destination City',
             labels={'destination_city': 'Destination City', 'count': 'Number of Stops', 'stops': 'Stops'})
fig.show()

#8.Average Flight Duration by Airline

In [None]:
avg_duration_by_airline = df.groupby('airline')['duration'].mean().reset_index()

In [None]:
fig = px.bar(avg_duration_by_airline, x='airline', y='duration',
             title='Average Flight Duration by Airline',
             labels={'airline': 'Airline', 'duration': 'Average Duration (minutes)'})
fig.show()

#Conclusion:-

the Indian airline industry is characterized by a dynamic mix of low-cost carriers and full-service airlines, each catering to diverse consumer segments. The analysis reveals key insights into pricing strategies, the influence of travel class, and the impact of flight duration and city pairs on fares. Popular routes between major metropolitan cities remain competitive with frequent flights and lower fares, while routes to tier-2 and tier-3 cities often command higher prices.

Challenges such as rising fuel costs, infrastructure limitations, and regulatory hurdles continue to impact the industry, but the growing demand for air travel, particularly from regional markets, presents opportunities for expansion. To sustain growth, airlines must focus on optimizing operational efficiency, leveraging technological advancements, and adapting to evolving passenger preferences. Strategic recommendations from the analysis aim to help airlines, policymakers, and investors navigate the complexities of the Indian aviation market and foster long-term sustainability.







