In [1]:
import pandas as pd
from sqlalchemy import create_engine
from Resources.config import sqlpass
import datetime


ModuleNotFoundError: No module named 'Resources.config'

In [None]:
#read the csv files in
airport = "Resources/airports.csv"
flights = "Resources/flights.csv"

#create df for airports
airport_df = pd.read_csv(airport)

#view the data
airport_df.head()


In [None]:
#create df for flights
flights_df = pd.read_csv(flights)

#view the data
flights_df.head()


In [None]:
#clean the airport df to only read the state of NY

NY_airports = airport_df.loc[airport_df['iso_region'] == 'US-NY']


#remove columns with duplicate information or blank

cleaned = NY_airports.drop(columns=['continent','home_link', 'wikipedia_link','keywords'], axis=1)

#remove all rows that have NaN in 'iata_code' to get rid of heleports, local hangers, ect...
airport_cleaned= cleaned[pd.notnull(cleaned['iata_code'])]
airport_cleaned.head(20)


In [None]:
#clean the flight data by removing unnecessary columns
cleaned_flights = flights_df.drop(columns=['TAXI_OUT', 'WHEELS_OFF','WHEELS_ON', 'TAXI_IN', ])
cleaned_flights.head()

In [None]:
#remove all origin airport or destination airports that are not in the airports dataframe by iata_code
NY_flights = cleaned_flights[(cleaned_flights['ORIGIN_AIRPORT'].isin(airport_cleaned['iata_code'])) | (cleaned_flights['DESTINATION_AIRPORT'].isin(airport_cleaned['iata_code']))] 
NY_flights.head()

In [None]:
#create new Date column
NY_flights['DATE'] = ""
NY_flights.head()

#Merge Year, Month and Day columns together 

NY_flights["DATE"]=NY_flights.apply(lambda x:'%s-%s-%s' % (x['YEAR'],x['MONTH'], x['DAY']),axis=1)
#convert date column to datetime format
NY_flights['DATE'] = pd.to_datetime(NY_flights['DATE'])

#Drop the Year, Month and Day columns as they are now unnecessary
NY_flights.drop(columns=['YEAR', 'MONTH','DAY'], inplace=True)


In [None]:
# reorder the columns
NY_flights = NY_flights[['DATE', 'DAY_OF_WEEK', 'AIRLINE', 'FLIGHT_NUMBER',\
                        'TAIL_NUMBER', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT',\
                        'SCHEDULED_DEPARTURE','DEPARTURE_TIME','DEPARTURE_DELAY',\
                        'SCHEDULED_TIME', 'ARRIVAL_DELAY', 'DIVERTED', 'CANCELLED',\
                        'CANCELLATION_REASON', 'AIR_SYSTEM_DELAY','SECURITY_DELAY', 'AIRLINE_DELAY',\
                        'LATE_AIRCRAFT_DELAY','WEATHER_DELAY']]
NY_flights.head()

In [None]:
#create connection to engine
engine = create_engine(f'postgresql://postgres:{sqlpass}@localhost:5432/Airport_Delays')
connection = engine.connect()

In [None]:
#check for tables
engine.table_names()

In [None]:
#load flight data into database
NY_flights.to_sql(name='Flights', con=engine, if_exists='append', index=False)


In [None]:
#load airport data into database
airport_cleaned.to_sql(name='Airports', con=engine, if_exists='append', index=False)