## Importing Dependencies

In [1]:
import pandas as pd

from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.types import (Date, DateTime, Float, Integer,
                              String, Interval, Time)

## Connecting to SQLite DB

In [2]:
# Creating the SQLITE DB
db_path = Path('Resources/Flights_DB.sqlite')
engine = create_engine(f'sqlite:///{db_path}')

# Connecting to engine
conn = engine.connect()

## Creating the Flights table

In [3]:
# Import Flights table as DataFrame
flights_path = Path('Resources/DB_Flights_Table.csv')
flights_df = pd.read_csv(flights_path, index_col=0)

flights_df.head(10)

Unnamed: 0,Airline_Flight_Number,Operating_Airline,Origin_Airport,Destination_Airport,Flight_Date,Departure_Time,Arrival_Time,Delayed,Cancelled
0,1581,DL,FLL,LGA,2022-01-06,,,0,1.0
1,1582,DL,ATL,FLL,2022-01-06,16:27,18:20,0,0.0
2,1582,DL,FLL,ATL,2022-01-06,19:29,21:15,0,0.0
3,1583,DL,FLL,RDU,2022-01-06,10:19,12:12,0,0.0
4,1584,DL,ATL,JAN,2022-01-06,11:13,11:31,0,0.0
5,1584,DL,JAN,ATL,2022-01-06,12:30,14:35,0,0.0
6,1585,DL,RIC,ATL,2022-01-06,08:57,10:38,0,0.0
7,1586,DL,MSP,RSW,2022-01-06,10:18,14:35,1,0.0
8,1587,DL,ATL,RDU,2022-01-06,14:22,15:31,1,0.0
9,1587,DL,RDU,ATL,2022-01-06,16:40,18:03,0,0.0


In [4]:
# Convert DataFrame to SQLITE table
flights_df.to_sql('Flights',
                  con=engine,
                  index=True,
                  index_label='ID',
                  if_exists='replace',
                  dtype={
                      'Airline_Flight_Number': Integer(),
                      'Operating_Airline': String(),
                      'Origin_Airport': String(),
                      'Destination_Airport': String(),
                      'Flight_Date': String(),
                      'Departure_Time': String(),
                      'Arrival_Time': String(),
                      'Delayed': Integer(),
                      'Cancelled': Integer()
                   })

4078318

In [5]:
# Check if successful
engine.execute('SELECT * FROM Flights LIMIT 5').fetchall()

  engine.execute('SELECT * FROM Flights LIMIT 5').fetchall()


[(0, 1581, 'DL', 'FLL', 'LGA', '2022-01-06', None, None, 0, 1),
 (1, 1582, 'DL', 'ATL', 'FLL', '2022-01-06', '16:27', '18:20', 0, 0),
 (2, 1582, 'DL', 'FLL', 'ATL', '2022-01-06', '19:29', '21:15', 0, 0),
 (3, 1583, 'DL', 'FLL', 'RDU', '2022-01-06', '10:19', '12:12', 0, 0),
 (4, 1584, 'DL', 'ATL', 'JAN', '2022-01-06', '11:13', '11:31', 0, 0)]

In [7]:
# Make copy of columns and dtypes as a dictionary to use as a read_sql arg
flights_schema = dict(zip(flights_df.columns.tolist(),
                          flights_df.dtypes.tolist()))

In [8]:
# Read DataFrame back from SQLite
pd.read_sql_query('SELECT * FROM Flights',
            con=engine,
            index_col='ID',
            dtype=flights_schema)

Unnamed: 0_level_0,Airline_Flight_Number,Operating_Airline,Origin_Airport,Destination_Airport,Flight_Date,Departure_Time,Arrival_Time,Delayed,Cancelled
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1581,DL,FLL,LGA,2022-01-06,,,0,1.0
1,1582,DL,ATL,FLL,2022-01-06,16:27,18:20,0,0.0
2,1582,DL,FLL,ATL,2022-01-06,19:29,21:15,0,0.0
3,1583,DL,FLL,RDU,2022-01-06,10:19,12:12,0,0.0
4,1584,DL,ATL,JAN,2022-01-06,11:13,11:31,0,0.0
...,...,...,...,...,...,...,...,...,...
4078313,3406,YX,ALB,EWR,2022-07-01,15:16,16:30,1,0.0
4078314,3405,YX,AVL,EWR,2022-07-01,12:36,14:28,0,0.0
4078315,3403,YX,ALB,EWR,2022-07-01,11:53,13:33,1,0.0
4078316,3401,YX,BNA,EWR,2022-07-01,14:58,18:43,1,0.0


## Creating the Delayed Flights table

In [9]:
# Import Delayed Flights table as DataFrame
delayed_flights_path = Path('Resources/DB_Delayed_Flights_Table.csv')
delayed_flights_df = pd.read_csv(delayed_flights_path, index_col=0)

delayed_flights_df.head(10)

Unnamed: 0,Airline_Flight_Number,Arrival_Time,Arrival_Delayed_Minutes,Departure_Time,Departure_Delayed_Minutes,Carrier_Delay_Minutes,Weather_Delay_Minutes,NAS_Delay_Minutes,Security_Delay_Minutes,Late_Aircraft_Delay_Minutes
7,1586,14:35,0 days 00:02:00,10:18,0 days 00:18:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
8,1587,15:31,0 days 00:00:00,14:22,0 days 00:08:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
17,1593,10:22,0 days 00:08:00,06:56,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
19,1595,12:14,0 days 00:40:00,10:46,0 days 00:51:00,0 days 00:02:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:38:00
21,1597,22:30,0 days 00:07:00,20:58,0 days 00:03:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
22,1598,18:58,0 days 01:43:00,15:30,0 days 01:31:00,0 days 01:31:00,0 days 00:00:00,0 days 00:12:00,0 days 00:00:00,0 days 00:00:00
23,1599,08:57,0 days 00:08:00,06:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
27,1602,06:10,0 days 00:00:00,23:24,0 days 00:24:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00
28,1603,08:51,0 days 00:43:00,06:02,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:43:00,0 days 00:00:00,0 days 00:00:00
29,1604,23:17,0 days 00:00:00,20:30,0 days 00:02:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00,0 days 00:00:00


In [None]:
# Convert DataFrame to SQLITE table
delayed_flights_df.to_sql('Delayed_Flights',
                          con=engine,
                          index=True,
                          index_label='ID',
                          if_exists='replace',
                          dtype={
                              'Airline_Flight_Number': Integer(),
                              'Arrival_Time': String(),
                              'Arrival_Delayed_Minutes': String(),
                              'Departure_Time': String(),
                              'Departure_Delayed_Minutes': String(),
                              'Carrier_Delay_Minutes': String(),
                              'Weather_Delay_Minutes': String(),
                              'NAS_Delay_Minutes': String(),
                              'Security_Delay_Minutes': String(),
                              'Late_Aircraft_Delay_Minutes': String()
                          })

In [None]:
# Check if successful
engine.execute('SELECT * FROM Delayed_Flights LIMIT 5').fetchall()

In [None]:
# Make copy of columns and dtypes as a dictionary to use as a read_sql arg
delayed_flights_schema = dict(zip(delayed_flights_df.columns.tolist(),
                                  delayed_flights_df.dtypes.tolist()))

In [None]:
# Read DataFrame back from SQLite
pd.read_sql_query('SELECT * FROM Delayed_Flights LIMIT 5',
                  con=engine,
                  index_col='ID',
                  dtype=delayed_flights_schema)

## Create Airport Codes table

In [None]:
# Import Airports table as DataFrame
airports_path = Path('Resources/DB_Airport_Codes.csv')
airports_df = pd.read_csv(airports_path, index_col=0)

airports_df.head(10)

In [None]:
# Convert DataFrame to SQLITE table
airports_df.to_sql('Airport_Codes',
                   con=engine,
                   index=False,
                   if_exists='replace',
                   dtype={
                       'Airport_Code': Integer(),
                       'Airport_City': String(),
                       'Airport_State': String()
                   })

In [None]:
# Check if successful
engine.execute('SELECT * FROM Airport_Codes LIMIT 5').fetchall()

In [None]:
# Read DataFrame back from SQLite
pd.read_sql_query('SELECT * FROM Airport_Codes LIMIT 5',
                  con=engine)

## Create Airlines Table

In [None]:
# Import Airlines table as DataFrame
airlines_path = Path('Resources/DB_Airline_Table.csv')
airlines_df = pd.read_csv(airlines_path, index_col=0)

airlines_df.head(10)

In [None]:
# Convert DataFrame to SQLITE table
airlines_df.to_sql('Airline_Codes',
                   con=engine,
                   index=False,
                   if_exists='replace',
                   dtype={
                       'Airline_Code': String(),
                       'Airline_Name': String(),
                   })

In [None]:
# Check if successful
engine.execute('SELECT * FROM Airline_Codes LIMIT 5').fetchall()

In [None]:
# Read DataFrame back from SQLite
pd.read_sql_query('SELECT * FROM Airline_Codes LIMIT 5',
                  con=engine)

## Creating Cancelled Flights Table

In [None]:
# Import Cancelled Flights table as DataFrame
cancelled_flights_path = Path('Resources/DB_Cancelled_Flights_Table.csv')
cancelled_flights_df = pd.read_csv(cancelled_flights_path, index_col=0)

cancelled_flights_df.head(10)

In [None]:
# Convert DataFrame to SQLITE table
cancelled_flights_df.to_sql('Cancelled_Flights',
                            con=engine,
                            index=True,
                            index_label='ID',
                            if_exists='replace',
                            dtype={
                                'Airline_Flight_Number': String(),
                                'Cancellation_Code': String()
                            })

In [None]:
# Check if successful
engine.execute('SELECT * FROM Cancelled_Flights LIMIT 5').fetchall()

In [None]:
# Read DataFrame back from SQLite
pd.read_sql_query('SELECT * FROM Cancelled_Flights LIMIT 5',
                  index_col='ID',
                  con=engine)

In [None]:
# Close the engine connection
conn.close()