Write a script to analyze the flight data from PS2.
- The following are required elements of the final script(s) that are turned in:
    - You must have a database with at least 2 tables
    - Each table should have a primary key 
    - There should be at least one foreign key
    - You can either set up the tables in sqlite or from your Python script.
- Your Python script should use SQLAlchemy to
    - Add data to the database
    - Query data in the database using each of these commands:
        - select, where, join
    - Preferably uses the SQL Expression Language, but OK if you use raw SQL.
    
# Data in subfolder /data/..

# Create database with 2 tables

In [1]:
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey
from sqlalchemy import DateTime, Boolean
from sqlalchemy import exists
from sqlalchemy import sql, select, join, desc

# Create a sqlite database 
engine = create_engine('sqlite:///data/flights.project3.sqlite')

metadata=MetaData(engine)

# Try to load Airports info from database, if not there, create it.
try:
    Airports=Table('Airports', metadata, autoload=True)
except:
    Airports = Table ('Airports', metadata,
                Column('ID', Integer, autoincrement=True),
                Column('Code', String, primary_key=True),
                Column('City', String),
                Column('State', String),
                Column('Name', String),
               )

# Same for Flights table.
try:
    Flights=Table('Flights', metadata, autoload=True)
except:
    Flights = Table ('Flights', metadata,
                 Column('Fl_date', DateTime),
                 Column('Airline_ID', String),
                 Column('Code', String, primary_key=True),
                 Column('Origin', String, ForeignKey("Airports.Code")),
                 Column('Destination', String, ForeignKey("Airports.Code")),
                 Column('Dep_Time', String),
                 Column('Dep_Delay_New', Integer),
                 Column('Arr_Time', String),
                 Column('Arr_Delay_New', Integer),
                 Column('Cancelled', Boolean),
                 Column('Cancellation_Code', String),
                 Column('Diverted', Boolean),
                 Column('Air_Time', String),
                 Column('Flights', Integer),
                 Column('Distance', Integer),
                 Column('Carrier_Delay', Integer),
                 Column('Weather_Delay', Integer),
                 Column('NAS_Delay', Integer),
                 Column('Security_Delay', Integer),
                 Column('Late_Aircraft_Delay', Integer)
                )
                 
                 
metadata.create_all(engine)

# Add data to sqlite database

In [None]:
import csv

flights = open("data/flights.May2017-Apr2018.csv")

reader = csv.DictReader(flights)

Airport_dict={}

# Read through the file and make a dictionary for airport codes.
# This gets a unique list of airport codes.
for Line in reader:
    if Line['ORIGIN'] not in Airport_dict:
        Airport_dict[Line['ORIGIN']]=[Line['ORIGIN_CITY_NAME'], Line['ORIGIN_STATE_ABR']]

    if Line['DEST'] not in Airport_dict:
        Airport_dict[Line['DEST']]=[Line['DEST_CITY_NAME'], Line['DEST_STATE_ABR']]
        
# Add the Airport_dict codes to the Airports table

conn = engine.connect()

def insert_airport(code,city,state):
    ins=Airports.insert().values(Code=code,
                                 City=city,
                                 State=state)
    result = conn.execute(ins)

for key, value in Airport_dict.items(): 
    insert_airport(key, value[0], value[1])

    
flights.close()

In [None]:
import datetime
import pandas as pd

def to_date(dates, lookup=False, **args):
    if lookup:
        return dates.map({v: pd.to_datetime(v, **args) for v in dates.unique()})
    return pd.to_datetime(dates, **args)

flights=open("data/flights.May2017-Apr2018.csv")
reader = csv.DictReader(flights)
for Line in reader:

    ins=Flights.insert().values(Fl_date=to_date(Line['FL_DATE']),
                                Airline_ID = Line['AIRLINE_ID'],
                                Origin = Line['ORIGIN'],
                                Destination = Line['DEST'],
                                Dep_Time = Line['DEP_TIME'],
                                Dep_Delay_New = Line['DEP_DELAY_NEW'],
                                Arr_Time = Line['ARR_TIME'],
                                Arr_Delay_New = Line['ARR_DELAY_NEW'],
                                Cancelled = int(float((Line['CANCELLED']))),
                                Cancellation_Code = Line['CANCELLATION_CODE'],
                                Diverted = int(float((Line['DIVERTED']))),
                                Air_Time = Line['AIR_TIME'],
                                Flights = Line['FLIGHTS'],
                                Distance = Line['DISTANCE'],
                                Carrier_Delay = Line['CARRIER_DELAY'],
                                Weather_Delay = Line['WEATHER_DELAY'],
                                NAS_Delay = Line['NAS_DELAY'],
                                Security_Delay = Line['SECURITY_DELAY'],
                                Late_Aircraft_Delay = Line['LATE_AIRCRAFT_DELAY']
                                          )
    result = conn.execute(ins)

    

# Select
Selecting all Origin and Destination pairs

In [None]:
from sqlalchemy import select

query=select([Flights.c.Origin, Flights.c.Destination])
             
result = conn.execute(query)
for row in result:
    print(row)

# Where
Selecting where air_time is max and printing origin and destination

In [None]:
from sqlalchemy import select
from sqlalchemy import func

query=select([Flights.c.Origin, Flights.c.Destination, func.max(Flights.c.Air_Time])
             
result = conn.execute(query)
for row in result:
    print(row)

In [None]:
# Join