In [37]:
import sqlite3
import pandas as pd
from aws_utils import *

In [38]:
client, resource = aws_connect()

In [39]:
# client.download_file('flights-db', 'db-file', 'airline2.db')
os.chdir('..')

In [40]:
conn = sqlite3.connect("airline2.db")
cur = conn.cursor()

### Which of the following airplanes has the lowest associated average departure delay (excluding cancelled and diverted flights)?

In [45]:
q1 = """
    SELECT
        model AS model,
        AVG(on_time.departure_delay) AS avg_delay
    FROM planes
    JOIN on_time USING(tail_number)
    WHERE on_time.cancelled = 0
    AND on_time.diverted = 0
    AND on_time.departure_delay > 0
    GROUP BY model
    ORDER BY avg_delay
    LIMIT 1
"""

In [46]:
cur.execute(q1)
result = cur.fetchone()
result

('737-230', 12.956403269754768)

### Which of the following cities has the highest number of inbound flights (excluding cancelled flights)?

In [51]:
q2 = """
    SELECT
        airports.city AS city,
        COUNT(*) AS total
    FROM airports
    JOIN on_time
    ON on_time.destination = airports.iata
    WHERE on_time.cancelled = 0
    GROUP BY airports.city
    ORDER BY total DESC
    LIMIT 1;
"""

In [52]:
cur.execute(q2)
result = cur.fetchone()
result

[('Chicago', 1051325),
 ('Atlanta', 937349),
 ('Dallas-Fort Worth', 671818),
 ('Houston', 609759),
 ('New York', 547465),
 ('Denver', 541437),
 ('Los Angeles', 537060),
 ('Phoenix', 488776),
 ('Las Vegas', 424243),
 ('Detroit', 353871),
 ('Newark', 348671),
 ('Salt Lake City', 334946),
 ('Minneapolis', 321827),
 ('San Francisco', 309281),
 ('Orlando', 292005),
 ('Boston', 290495),
 ('Charlotte', 281249),
 ('Covington', 263710),
 ('Baltimore', 248436),
 ('Seattle', 247895),
 ('Philadelphia', 240470),
 ('San Diego', 219304),
 ('Chantilly', 206970),
 ('Arlington', 206725),
 ('Cleveland', 183645),
 ('Tampa', 182489),
 ('Oakland', 170011),
 ('Ft. Lauderdale', 160938),
 ('Memphis', 156615),
 ('Portland', 152008),
 ('Miami', 150943),
 ('Honolulu', 147122),
 ('St Louis', 147078),
 ('Raleigh', 142492),
 ('San Jose', 140590),
 ('Kansas City', 140519),
 ('Nashville', 138980),
 ('Sacramento', 129216),
 ('Dallas', 119681),
 ('Santa Ana', 119301),
 ('Austin', 111849),
 ('Pittsburgh', 109288),
 ('San

### Which of the following companies has the highest number of cancelled flights, relative to their number of total flights?

In [47]:
q3 = """
    SELECT
        q1.carrier AS carrier,
        (CAST(q1.numerator AS FLOAT)/ CAST(q2.denominator AS FLOAT)) AS ratio
    FROM (
        SELECT
            carriers.description AS carrier,
            COUNT(*) AS numerator
        FROM carriers
        JOIN on_time
        ON on_time.unique_carrier = carriers.code
        WHERE on_time.cancelled = 1
        AND carriers.description IN ('United Air Lines Inc.', 'American Airlines Inc.', 'Pinnacle Airlines Inc.', 'Delta Air Lines Inc.')
        GROUP BY carriers.description) AS q1
    JOIN (
        SELECT
            carriers.description AS carrier,
            COUNT(*) AS denominator
        FROM carriers
        JOIN on_time
        ON on_time.unique_carrier = carriers.code
        WHERE carriers.description IN ('United Air Lines Inc.', 'American Airlines Inc.', 'Pinnacle Airlines Inc.', 'Delta Air Lines Inc.')
        GROUP BY carriers.description) AS q2 USING(carrier)
    ORDER BY ratio DESC
    LIMIT 1;
"""

In [48]:
cur.execute(q3)
result = cur.fetchone()
result

[('Pinnacle Airlines Inc.', 0.034664900204487197),
 ('American Airlines Inc.', 0.026204948541537897),
 ('United Air Lines Inc.', 0.02341081287083364),
 ('Delta Air Lines Inc.', 0.015235135662146506)]

### Which of the following companies has the highest number of cancelled flights?

In [53]:
q4 = """
    SELECT
        a.description AS carrier,
        SUM(o.cancelled) AS 'cancelled flights'
    FROM on_time o
    JOIN carriers c
    ON o.unique_carrier = c.code
    WHERE c.description IN ('United Air Lines Inc.', 'American Airlines Inc.', 'Pinnacle Airlines Inc.', 'Delta Air Lines Inc.')
    GROUP BY carrier
    ORDER BY SUM(o.cancelled) DESC
    LIMIT 1;
"""

In [54]:
cur.execute(q4)
result = cur.fetchone()
result

('AA', 38835)

In [None]:
conn.close()