In [0]:
# import the required Python libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [0]:
# import the Flights dataset

data = pd.read_csv("/Volumes/de_by_bbytes/flight_data_analysis/fda_raw_dataset/airlines_flights_data.csv")

data

In [0]:
### Cleaning the data

# Remove the 'index' column

data.drop( columns = 'index', inplace = True)

data

# Get some Info about the dataset

data.info()

# Get Statistical summary about the dataset

data.describe()

data[ data['duration'] == 49.830000 ]

data[ data['duration'] == 0.830000 ]

data [ data['price'] == 123071.000000 ]

data [ data['price'] == 1105.000000 ]

data.isnull().sum()            # check for the missing values in any column

In [0]:
## Q.1. `What are the airlines in the dataset, accompanied by their frequencies?`

data.head()

# Checking how many Airlines are in the dataset

data['airline'].nunique()

# Showing the names of the Airlines in the dataset

data['airline'].unique()

# Showing all the Airlines with their frequencies

data['airline'].value_counts()

# Showing all the Airlines with their Number of Flights in Horizontal Bar Graph

data['airline'].value_counts(ascending=True).plot.barh( color = ['lightgreen', 'lightblue'])

plt.title("Airlines with frequencies")

plt.xlabel(" Number of flights")

plt.ylabel(" Airlines")

plt.show()

In [0]:
## Q.2. `Show Bar Graphs representing the Departure Time & Arrival Time`

data.head()

# Showing the Departure Time for the flights

data['departure_time'].value_counts()

# Showing the Arrival Time for the flights

data['arrival_time'].value_counts()

# Showing the Departure Time & Arrival Time for the flights with their counts

plt.figure(figsize = (16,4))

plt.subplot(1,2,1)

plt.bar( data['departure_time'].value_counts().index , data['departure_time'].value_counts().values, color = ['r', 'b'] )
plt.title("Departure Time")
plt.xlabel("D. Time")
plt.ylabel("D. Freq")

plt.subplot(1,2,2)

plt.bar( data['arrival_time'].value_counts().index, data['arrival_time'].value_counts().values, color = ['g', 'y'])
plt.title("Arrival Time")
plt.xlabel("A. Time")
plt.ylabel("A. Freq")

plt.show()

In [0]:
## Q.3. `Show Bar Graphs representing the Source City & Destination City`

data.head()

# Showing the Source City of the flights

data['source_city'].value_counts()

# Showing the Destination City of the flights

data['destination_city'].value_counts()

# Showing the Source City & Destination City for the flights with their counts

plt.figure( figsize= (16,4))

plt.subplot(1,2,1)

plt.barh( data['source_city'].value_counts().index , data['source_city'].value_counts().values, color = ['m', 'g'])
plt.title("Source Cities with No. of flights")
plt.ylabel("Cities")
plt.xlabel("No. of flights")

plt.subplot(1,2,2)

plt.barh( data['destination_city'].value_counts().index , data['destination_city'].value_counts().values, color = ['y', 'r'])
plt.title("Destination Cities with No. of flights")
plt.ylabel("Cities")
plt.xlabel("No. of flights")

plt.show()


In [0]:

## Q.4. `Does price varies with airlines ?`

data.head()

# Grouping the airlines and checking their mean price

data.groupby('airline')['price'].mean()

# Drawing a Categorical Plot showing the Mean Ticket Price for each Airline

sns.catplot( x = 'airline', y = 'price', kind = 'bar', palette = 'rocket', data = data, hue = 'class')

plt.show()

In [0]:
## Q.5. `Does ticket price change based on the departure time and arrival time?`

data.head()

# Checking the Mean Ticket Price based on the Departure Times

data.groupby('departure_time')['price'].mean()

# Checking the Mean Ticket Price based on the Arrival Times

data.groupby('arrival_time')['price'].mean()

sns.catplot( x = 'departure_time', y = 'price', kind = 'bar', data = data )

plt.show()

sns.catplot( x = 'arrival_time', y = 'price', kind = 'bar', data = data )

plt.show()

sns.relplot( x = 'arrival_time', y = 'price', data = data, col = 'departure_time', kind = 'line')

plt.show()

In [0]:
## Q.6. `How the price changes with change in Source and Destination?`

data.head()

# Checking the Mean Ticket Price for each Source City

data.groupby('source_city')['price'].mean()

# Checking the Mean Ticket Price for each Destination City

data.groupby('destination_city')['price'].mean()

sns.relplot( x = 'destination_city', y = 'price', data = data, col = "source_city", kind = 'line')

plt.show()

In [0]:

### Q.7. `How is the price affected when tickets are bought in just 1 or 2 days before departure?`

data.head()

data['days_left'].nunique()

data['days_left'].unique()

# Checking the Mean Ticket Price for different days_left

data.groupby('days_left')['price'].mean()

sns.relplot( y='price', x = 'days_left', kind = 'line', data = data )

plt.show()

In [0]:
## Q.8. `How does the ticket price vary between Economy and Business class?`

data.head()

data['class'].unique()

# Filtering out the records with Economy class

x = data [ data['class'] == 'Economy' ]

x

# Checking Mean Price for Economy class tickets

x.price.mean()

# Filtering out the records with Business class

y = data [ data['class'] == 'Business' ]

y

# Checking Mean Price for Business class tickets

y.price.mean()

In [0]:


## Q.9. `What will be the Average Price of Vistara airline for a flight from Delhi to Hyderabad in Business Class ?`

data.head(1)

# applying filtering based on multiple conditions

new_data = data [(data['airline'] == 'Vistara') & (data['source_city'] == 'Delhi') & (data['destination_city'] == 'Hyderabad')
     & (data['class'] == 'Business')]

new_data

# Checking the mean price

new_data['price'].mean()

