Prior to loading this data into jupyter notebook first we created a SQLite database and inserted the 3 tables (airlines, airports & flights) tables into with the following steps in SQL command prompt:

To check for existing dbs: > .databases
To open or create db > .open DATABASE_NAME.db
To import csv > .mode csv
 .import CSV_NAME.csv TABLENAME
To check tables in db > .tables
To view table content > . mode columns
 .header on
SELECT * FROM TABLENAME;
To save be db > .backup DATABASE_NAME.db

# Setup

In [1]:
#import dependencies
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func

In [2]:
#connect to the database
database_path = "flights.db"
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()

In [3]:
#Double check it worked
inspector = inspect(engine)
inspector.get_table_names()

['airlines', 'airports', 'flights']

In [4]:
# Get a list of column names and types
columns = inspector.get_columns('airlines')
for c in columns:
    print(c['name'], c["type"])
# columns

IATA_CODE TEXT
AIRLINE TEXT


In [5]:
# Get a list of column names and types
columns = inspector.get_columns('airports')
for c in columns:
    print(c['name'], c["type"])
# columns

IATA_CODE TEXT
AIRPORT TEXT
CITY TEXT
STATE TEXT
COUNTRY TEXT
LATITUDE TEXT
LONGITUDE TEXT


In [6]:
# Get a list of column names and types
columns = inspector.get_columns('flights')
for c in columns:
    print(c['name'], c["type"])
# columns

YEAR TEXT
MONTH TEXT
DAY TEXT
DAY_OF_WEEK TEXT
AIRLINE TEXT
FLIGHT_NUMBER TEXT
TAIL_NUMBER TEXT
ORIGIN_AIRPORT TEXT
DESTINATION_AIRPORT TEXT
SCHEDULED_DEPARTURE TEXT
DEPARTURE_TIME TEXT
DEPARTURE_DELAY TEXT
TAXI_OUT TEXT
WHEELS_OFF TEXT
SCHEDULED_TIME TEXT
ELAPSED_TIME TEXT
AIR_TIME TEXT
DISTANCE TEXT
WHEELS_ON TEXT
TAXI_IN TEXT
SCHEDULED_ARRIVAL TEXT
ARRIVAL_TIME TEXT
ARRIVAL_DELAY TEXT
DIVERTED TEXT
CANCELLED TEXT
CANCELLATION_REASON TEXT
AIR_SYSTEM_DELAY TEXT
SECURITY_DELAY TEXT
AIRLINE_DELAY TEXT
LATE_AIRCRAFT_DELAY TEXT
WEATHER_DELAY TEXT


# Cancellations by Airline

In [7]:
#find the number of flights cancelled for which the airline was responsible (i.e. not weather or security related etc.)
cancelled_count_by_airline = pd.read_sql_query('SELECT a.AIRLINE as `Airline Name`, a.IATA_CODE as `Airline Code`,\
                                        SUM(f.CANCELLED) as `SUM_CANCELLED`\
                                        FROM airlines as a\
                                        INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                                        WHERE CANCELLATION_REASON = "A"\
                                        GROUP BY f.AIRLINE\
                                        ORDER BY(SUM_CANCELLED) desc;', con = engine)
cancelled_count_by_airline

Unnamed: 0,Airline Name,Airline Code,SUM_CANCELLED
0,Southwest Airlines Co.,WN,6122
1,Atlantic Southeast Airlines,EV,3604
2,Skywest Airlines Inc.,OO,3205
3,American Airlines Inc.,AA,2879
4,United Air Lines Inc.,UA,2870
5,American Eagle Airlines Inc.,MQ,2475
6,US Airways Inc.,US,1007
7,JetBlue Airways,B6,883
8,Spirit Air Lines,NK,654
9,Delta Air Lines Inc.,DL,594


In [8]:
#insert this table into the sqlite database to perform further queries
cancelled_count_by_airline.to_sql(name='cancelled_count_by_airline', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airlines', 'airports', 'cancelled_count_by_airline', 'flights']

In [9]:
#combine the avg delay to show the actual airline as opposed to the shortcode only 
cancelled_count = pd.read_sql_query('SELECT airlines.AIRLINE, airlines.IATA_CODE, flights.CANCELLED as total_cancelled\
                                     FROM airlines \
                                     JOIN flights \
                                     ON airlines.IATA_code = flights.AIRLINE\
                                     WHERE flights.CANCELLED = 1;', con = engine)

#insert this table into the sqlite database to perform further queries
cancelled_count.to_sql(name='cancelled_count', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airlines',
 'airports',
 'cancelled_count',
 'cancelled_count_by_airline',
 'flights']

In [10]:
cancelled_count.head()

Unnamed: 0,AIRLINE,IATA_CODE,total_cancelled
0,Alaska Airlines Inc.,AS,1
1,American Airlines Inc.,AA,1
2,Skywest Airlines Inc.,OO,1
3,American Eagle Airlines Inc.,MQ,1
4,Skywest Airlines Inc.,OO,1


In [11]:
#Get an idea on which airlines cancel the most flights
total_cancelled = pd.read_sql_query('SELECT AIRLINE, IATA_CODE, SUM(total_cancelled) as cancelled_flights\
                                     FROM cancelled_count \
                                     GROUP BY AIRLINE\
                                     ORDER BY SUM(total_cancelled) desc;', con = engine)
total_cancelled

Unnamed: 0,AIRLINE,IATA_CODE,cancelled_flights
0,Southwest Airlines Co.,WN,16043
1,Atlantic Southeast Airlines,EV,15231
2,American Eagle Airlines Inc.,MQ,15025
3,American Airlines Inc.,AA,10919
4,Skywest Airlines Inc.,OO,9960
5,United Air Lines Inc.,UA,6573
6,JetBlue Airways,B6,4276
7,US Airways Inc.,US,4067
8,Delta Air Lines Inc.,DL,3824
9,Spirit Air Lines,NK,2004


In [12]:
#insert this table into the sqlite database to perform further queries
total_cancelled.to_sql(name='total_cancelled', con=engine, if_exists='replace', index=False)
#drop the previous table to keep the database clean
engine.execute("DROP TABLE cancelled_count")
#check it worked
engine.table_names()

['airlines',
 'airports',
 'cancelled_count_by_airline',
 'flights',
 'total_cancelled']

In [13]:
#get the total count of flights to get a ratio of total cancelled
#Get an idea on which airlines cancel the most flights
total_flights = pd.read_sql_query("SELECT AIRLINE, COUNT(AIRLINE) as flight_totals\
                                     FROM flights \
                                     GROUP BY AIRLINE", con = engine)
#insert this table into the sqlite database to perform further queries
total_flights.to_sql(name='total_flights', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airlines',
 'airports',
 'cancelled_count_by_airline',
 'flights',
 'total_cancelled',
 'total_flights']

In [14]:
total_flights

Unnamed: 0,AIRLINE,flight_totals
0,AA,725984
1,AS,172521
2,B6,267048
3,DL,875881
4,EV,571977
5,F9,90836
6,HA,76272
7,MQ,294632
8,NK,117379
9,OO,588353


In [15]:
#merge the total cancellation and total number of flights to get the cancellation ratio
#combine the avg delay to show the actual airline as opposed to the shortcode only 
cancelled_and_count = pd.read_sql_query("SELECT total_cancelled.AIRLINE, total_cancelled.IATA_CODE,\
                                        total_cancelled.cancelled_flights,total_flights.flight_totals \
                                        FROM total_cancelled JOIN total_flights \
                                        ON total_cancelled.IATA_CODE = total_flights.AIRLINE", con = engine)
cancelled_and_count

Unnamed: 0,AIRLINE,IATA_CODE,cancelled_flights,flight_totals
0,Southwest Airlines Co.,WN,16043,1261855
1,Atlantic Southeast Airlines,EV,15231,571977
2,American Eagle Airlines Inc.,MQ,15025,294632
3,American Airlines Inc.,AA,10919,725984
4,Skywest Airlines Inc.,OO,9960,588353
5,United Air Lines Inc.,UA,6573,515723
6,JetBlue Airways,B6,4276,267048
7,US Airways Inc.,US,4067,198715
8,Delta Air Lines Inc.,DL,3824,875881
9,Spirit Air Lines,NK,2004,117379


In [16]:
#clean up in pandas for ratio
cancelled_and_count['cancellation ratio'] = round(cancelled_and_count['cancelled_flights'] / cancelled_and_count['flight_totals'] *100,2)
cancelled_and_count = cancelled_and_count.sort_values(['cancellation ratio'])
cancelled_and_count

Unnamed: 0,AIRLINE,IATA_CODE,cancelled_flights,flight_totals,cancellation ratio
13,Hawaiian Airlines Inc.,HA,171,76272,0.22
10,Alaska Airlines Inc.,AS,669,172521,0.39
8,Delta Air Lines Inc.,DL,3824,875881,0.44
11,Frontier Airlines Inc.,F9,588,90836,0.65
12,Virgin America,VX,534,61903,0.86
0,Southwest Airlines Co.,WN,16043,1261855,1.27
5,United Air Lines Inc.,UA,6573,515723,1.27
3,American Airlines Inc.,AA,10919,725984,1.5
6,JetBlue Airways,B6,4276,267048,1.6
4,Skywest Airlines Inc.,OO,9960,588353,1.69


In [17]:
#insert this table into the sqlite database to perform further queries
cancelled_and_count.to_sql(name='cancelled_and_count', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airlines',
 'airports',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_cancelled',
 'total_flights']

In [18]:
#remove dbs which have double information we used in transformation for a tidy database
engine.execute("DROP TABLE total_cancelled")

<sqlalchemy.engine.result.ResultProxy at 0x17efe4cf128>

In [19]:
#check it worked
engine.table_names()

['airlines',
 'airports',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

# Average Delay by Airline

In [20]:
#check out the average delay on airline to see what airlines have the longest delays so we can avoid them
avg_airline_delays = pd.read_sql_query('SELECT a.AIRLINE as `Airline Name`, a.IATA_CODE as `Airline Code`,\
                  COUNT(f.AIRLINE) as `Total Flights not cancelled`,\
                  AVG(f.DEPARTURE_DELAY) as `Avg DEPARTURE_DELAY`,\
                  AVG(f.ARRIVAL_DELAY) as `Avg ARRIVAL_DELAY`\
                  FROM airlines as a\
                  INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                  WHERE f.CANCELLED = 0\
                  GROUP BY f.AIRLINE\
                  ORDER BY(`AVG DEPARTURE_DELAY`) desc;', con = engine)

In [21]:
#double check the table
avg_airline_delays

Unnamed: 0,Airline Name,Airline Code,Total Flights not cancelled,Avg DEPARTURE_DELAY,Avg ARRIVAL_DELAY
0,Spirit Air Lines,NK,115375,15.911281,14.448971
1,United Air Lines Inc.,UA,509150,14.379658,5.416787
2,Frontier Airlines Inc.,F9,90248,13.31747,12.482814
3,JetBlue Airways,B6,262772,11.499049,6.659309
4,Southwest Airlines Co.,WN,1245812,10.571461,4.362992
5,American Eagle Airlines Inc.,MQ,279607,10.01491,6.439027
6,Virgin America,VX,61369,9.006274,4.728364
7,American Airlines Inc.,AA,715065,8.864029,3.441091
8,Atlantic Southeast Airlines,EV,556746,8.66331,6.561793
9,Skywest Airlines Inc.,OO,578393,7.768222,5.829694


In [22]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
avg_airline_delays.to_sql(name='avg_airline_delays', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [23]:
#Check which airlines experience the most departure delays, do not include any delays 0 or under as those may have experienced some type of delay
#but were on time in the end
airline_departure_delay_counts = pd.read_sql_query('SELECT a.AIRLINE as `Airline_Name`, a.IATA_CODE as `Airline_Code`,\
                       COUNT(f.DEPARTURE_DELAY) as `Count_DEPARTURE_DELAY`\
                       FROM airlines as a\
                       INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                       WHERE f.DEPARTURE_DELAY <> 0\
                       GROUP BY f.AIRLINE\
                       ORDER BY(`Count_DEPARTURE_DELAY`) desc;', con = engine)
airline_departure_delay_counts

Unnamed: 0,Airline_Name,Airline_Code,Count_DEPARTURE_DELAY
0,Southwest Airlines Co.,WN,1170316
1,Delta Air Lines Inc.,DL,808748
2,American Airlines Inc.,AA,692329
3,Skywest Airlines Inc.,OO,561209
4,Atlantic Southeast Airlines,EV,550173
5,United Air Lines Inc.,UA,487593
6,American Eagle Airlines Inc.,MQ,279588
7,JetBlue Airways,B6,253426
8,US Airways Inc.,US,189487
9,Alaska Airlines Inc.,AS,165430


In [24]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
airline_departure_delay_counts.to_sql(name='airline_departure_delay_counts', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_departure_delay_counts',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [25]:
#add the total number of flights to the table to get a delay ratio
airline_departure_delay_ratio = pd.read_sql_query("SELECT airline_departure_delay_counts.Airline_Name, airline_departure_delay_counts.Airline_Code,\
                                        airline_departure_delay_counts.Count_DEPARTURE_DELAY, total_flights.flight_totals\
                                        FROM airline_departure_delay_counts JOIN total_flights \
                                        ON airline_departure_delay_counts.Airline_Code = total_flights.AIRLINE", con = engine)
airline_departure_delay_ratio

Unnamed: 0,Airline_Name,Airline_Code,Count_DEPARTURE_DELAY,flight_totals
0,Southwest Airlines Co.,WN,1170316,1261855
1,Delta Air Lines Inc.,DL,808748,875881
2,American Airlines Inc.,AA,692329,725984
3,Skywest Airlines Inc.,OO,561209,588353
4,Atlantic Southeast Airlines,EV,550173,571977
5,United Air Lines Inc.,UA,487593,515723
6,American Eagle Airlines Inc.,MQ,279588,294632
7,JetBlue Airways,B6,253426,267048
8,US Airways Inc.,US,189487,198715
9,Alaska Airlines Inc.,AS,165430,172521


In [26]:
#clean up in pandas for ratio
airline_departure_delay_ratio['departure_delay_ratio'] = round(airline_departure_delay_ratio['Count_DEPARTURE_DELAY'] / airline_departure_delay_ratio['flight_totals'] *100,2)
airline_departure_delay_ratio = airline_departure_delay_ratio.sort_values(['departure_delay_ratio'])
airline_departure_delay_ratio

Unnamed: 0,Airline_Name,Airline_Code,Count_DEPARTURE_DELAY,flight_totals,departure_delay_ratio
1,Delta Air Lines Inc.,DL,808748,875881,92.34
0,Southwest Airlines Co.,WN,1170316,1261855,92.75
13,Virgin America,VX,58020,61903,93.73
5,United Air Lines Inc.,UA,487593,515723,94.55
6,American Eagle Airlines Inc.,MQ,279588,294632,94.89
7,JetBlue Airways,B6,253426,267048,94.9
2,American Airlines Inc.,AA,692329,725984,95.36
8,US Airways Inc.,US,189487,198715,95.36
3,Skywest Airlines Inc.,OO,561209,588353,95.39
9,Alaska Airlines Inc.,AS,165430,172521,95.89


In [27]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
airline_departure_delay_ratio.to_sql(name='airline_departure_delay_ratio', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [28]:
#Check which airlines experience the most arrival delays, do not include any delays 0 or under as those may have experienced some type of delay
#but were on time in the end
airline_arrival_delay_counts = pd.read_sql_query('SELECT a.AIRLINE as `Airline_Name`, a.IATA_CODE as `Airline_Code`,COUNT(f.ARRIVAL_DELAY) as `Count_ARRIVAL_DELAY`\
                    FROM airlines as a\
                    INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                    WHERE f.ARRIVAL_DELAY <> 0\
                    GROUP BY f.AIRLINE\
                    ORDER BY(`Count_ARRIVAL_DELAY`) desc;', con = engine)

airline_arrival_delay_counts

Unnamed: 0,Airline_Name,Airline_Code,Count_ARRIVAL_DELAY
0,Southwest Airlines Co.,WN,1232551
1,Delta Air Lines Inc.,DL,858305
2,American Airlines Inc.,AA,711895
3,Skywest Airlines Inc.,OO,573941
4,Atlantic Southeast Airlines,EV,558743
5,United Air Lines Inc.,UA,506347
6,American Eagle Airlines Inc.,MQ,289448
7,JetBlue Airways,B6,262174
8,US Airways Inc.,US,194176
9,Alaska Airlines Inc.,AS,168197


In [29]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
airline_arrival_delay_counts.to_sql(name='airline_arrival_delay_counts', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_arrival_delay_counts',
 'airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [30]:
airline_arrival_delay_ratio = pd.read_sql_query("SELECT airline_arrival_delay_counts.Airline_Name, airline_arrival_delay_counts.Airline_Code,\
                                        airline_arrival_delay_counts.Count_ARRIVAL_DELAY, total_flights.flight_totals\
                                        FROM airline_arrival_delay_counts JOIN total_flights \
                                        ON airline_arrival_delay_counts.Airline_Code = total_flights.AIRLINE", con = engine)
airline_arrival_delay_ratio

Unnamed: 0,Airline_Name,Airline_Code,Count_ARRIVAL_DELAY,flight_totals
0,Southwest Airlines Co.,WN,1232551,1261855
1,Delta Air Lines Inc.,DL,858305,875881
2,American Airlines Inc.,AA,711895,725984
3,Skywest Airlines Inc.,OO,573941,588353
4,Atlantic Southeast Airlines,EV,558743,571977
5,United Air Lines Inc.,UA,506347,515723
6,American Eagle Airlines Inc.,MQ,289448,294632
7,JetBlue Airways,B6,262174,267048
8,US Airways Inc.,US,194176,198715
9,Alaska Airlines Inc.,AS,168197,172521


In [31]:
#clean up in pandas for ratio
airline_arrival_delay_ratio['arrival_delay_ratio'] = round(airline_arrival_delay_ratio['Count_ARRIVAL_DELAY'] / airline_arrival_delay_ratio['flight_totals'] *100,2)
airline_arrival_delay_ratio = airline_arrival_delay_ratio.sort_values(['arrival_delay_ratio'])
airline_arrival_delay_ratio

Unnamed: 0,Airline_Name,Airline_Code,Count_ARRIVAL_DELAY,flight_totals,arrival_delay_ratio
12,Hawaiian Airlines Inc.,HA,72955,76272,95.65
13,Virgin America,VX,60317,61903,97.44
9,Alaska Airlines Inc.,AS,168197,172521,97.49
3,Skywest Airlines Inc.,OO,573941,588353,97.55
0,Southwest Airlines Co.,WN,1232551,1261855,97.68
4,Atlantic Southeast Airlines,EV,558743,571977,97.69
8,US Airways Inc.,US,194176,198715,97.72
11,Frontier Airlines Inc.,F9,88851,90836,97.81
10,Spirit Air Lines,NK,114966,117379,97.94
1,Delta Air Lines Inc.,DL,858305,875881,97.99


In [32]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
airline_arrival_delay_ratio.to_sql(name='airline_arrival_delay_ratio', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_arrival_delay_counts',
 'airline_arrival_delay_ratio',
 'airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [33]:
airline_avg_DeptAndArr_TotalFlights = pd.read_sql_query('SELECT a.AIRLINE as `Airline Name`, a.IATA_CODE as `Airline Code`,\
                                                 COUNT(f.AIRLINE) as `Total Flights`,\
                                                 AVG(f.DEPARTURE_DELAY) as `Avg DEPARTURE_DELAY`,\
                                                 AVG(f.ARRIVAL_DELAY) as `Avg ARRIVAL_DELAY`\
                                                 FROM airlines as a\
                                                 INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                                                 WHERE f.CANCELLED = 0 AND AIRLINE_DELAY <> 0\
                                                 GROUP BY f.AIRLINE\
                                                 ORDER BY(`Total Flights`) desc;', con = engine)
airline_avg_DeptAndArr_TotalFlights

Unnamed: 0,Airline Name,Airline Code,Total Flights,Avg DEPARTURE_DELAY,Avg ARRIVAL_DELAY
0,Southwest Airlines Co.,WN,1153710,7.506802,0.489224
1,Delta Air Lines Inc.,DL,817162,4.871584,-3.544768
2,American Airlines Inc.,AA,653442,5.770684,-1.40131
3,Skywest Airlines Inc.,OO,509725,2.385777,-0.828021
4,Atlantic Southeast Airlines,EV,501505,4.420486,1.406365
5,United Air Lines Inc.,UA,470427,10.791878,0.41155
6,American Eagle Airlines Inc.,MQ,246296,5.061938,-0.934376
7,JetBlue Airways,B6,242262,8.240459,1.944692
8,US Airways Inc.,US,177816,3.878684,-0.428044
9,Alaska Airlines Inc.,AS,157863,-1.025408,-4.937959


In [34]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
airline_avg_DeptAndArr_TotalFlights.to_sql(name='airline_avg_DeptAndArr_TotalFlights', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_arrival_delay_counts',
 'airline_arrival_delay_ratio',
 'airline_avg_DeptAndArr_TotalFlights',
 'airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'flights',
 'total_flights']

In [35]:
#count the number of departure delay incidents by airline
count_departure_delay = pd.read_sql_query('SELECT a.AIRLINE as `Airline Name`, a.IATA_CODE as `Airline Code`,COUNT(f.DEPARTURE_DELAY) as `Count DEPARTURE_DELAY`\
                                        FROM airlines as a\
                                        INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                                        WHERE f.DEPARTURE_DELAY <> 0 AND AIRLINE_DELAY <> 0\
                                        GROUP BY f.AIRLINE\
                                        ORDER BY(`Count DEPARTURE_DELAY`) desc;', con = engine)
count_departure_delay

Unnamed: 0,Airline Name,Airline Code,Count DEPARTURE_DELAY
0,Southwest Airlines Co.,WN,1080653
1,Delta Air Lines Inc.,DL,756467
2,American Airlines Inc.,AA,632995
3,Atlantic Southeast Airlines,EV,496694
4,Skywest Airlines Inc.,OO,494568
5,United Air Lines Inc.,UA,450084
6,American Eagle Airlines Inc.,MQ,247495
7,JetBlue Airways,B6,233921
8,US Airways Inc.,US,173514
9,Alaska Airlines Inc.,AS,151904


In [36]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
count_departure_delay.to_sql(name='count_departure_delay', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_arrival_delay_counts',
 'airline_arrival_delay_ratio',
 'airline_avg_DeptAndArr_TotalFlights',
 'airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'count_departure_delay',
 'flights',
 'total_flights']

In [37]:
#count the number of departure delay incidents by airline
count_arrival_delay = pd.read_sql_query('SELECT a.AIRLINE as `Airline Name`, a.IATA_CODE as `Airline Code`,\
                                            COUNT(f.ARRIVAL_DELAY) as `Count ARRIVAL_DELAY`\
                                            FROM airlines as a\
                                            INNER JOIN flights as f ON a.IATA_CODE = f.AIRLINE\
                                            WHERE f.ARRIVAL_DELAY <> 0 AND AIRLINE_DELAY <> 0\
                                            GROUP BY f.AIRLINE\
                                            ORDER BY(`Count ARRIVAL_DELAY`) desc;', con = engine)
count_arrival_delay

Unnamed: 0,Airline Name,Airline Code,Count ARRIVAL_DELAY
0,Southwest Airlines Co.,WN,1140449
1,Delta Air Lines Inc.,DL,803410
2,American Airlines Inc.,AA,650272
3,Skywest Airlines Inc.,OO,505273
4,Atlantic Southeast Airlines,EV,503502
5,United Air Lines Inc.,UA,467624
6,American Eagle Airlines Inc.,MQ,256137
7,JetBlue Airways,B6,241664
8,US Airways Inc.,US,177344
9,Alaska Airlines Inc.,AS,154208


In [38]:
#insert this table into the sqlite database to perform further queries
#Drop the table if it already exists and update it with new data
count_arrival_delay.to_sql(name='count_arrival_delay', con=engine, if_exists='replace', index=False)
#check it worked
engine.table_names()

['airline_arrival_delay_counts',
 'airline_arrival_delay_ratio',
 'airline_avg_DeptAndArr_TotalFlights',
 'airline_departure_delay_counts',
 'airline_departure_delay_ratio',
 'airlines',
 'airports',
 'avg_airline_delays',
 'cancelled_and_count',
 'cancelled_count_by_airline',
 'count_arrival_delay',
 'count_departure_delay',
 'flights',
 'total_flights']