# Imports

In [1]:
import pandas as pd
import psycopg2

# Import Airport Weather Data

In [2]:
airport_weather_df = pd.read_csv("Database/weather/weather_data/weather_2018_df.csv")
airport_weather_df.dtypes

location          object
date_time         object
precipMM         float64
visibility         int64
cloudcover         int64
windspeedKmph      int64
humidity           int64
dtype: object

In [3]:
# Create Date and Hour Column
airport_weather_df["date"] = airport_weather_df["date_time"].str[0:10]
airport_weather_df["date"] = "2021" + airport_weather_df["date"].str[4:10]
airport_weather_df["hour"] = airport_weather_df["date_time"].str[11:13].apply(int)

In [4]:
# Convert metric weather columns to US units
airport_weather_df["windspeed"] = airport_weather_df["windspeedKmph"]*(0.62137119)
airport_weather_df.head()

Unnamed: 0,location,date_time,precipMM,visibility,cloudcover,windspeedKmph,humidity,date,hour,windspeed
0,"islip,ny",2018-01-01 00:00:00,0.0,10,61,27,77,2021-01-01,0,16.777022
1,"islip,ny",2018-01-01 01:00:00,0.0,10,50,28,76,2021-01-01,1,17.398393
2,"islip,ny",2018-01-01 02:00:00,0.0,10,40,29,76,2021-01-01,2,18.019765
3,"islip,ny",2018-01-01 03:00:00,0.0,10,29,30,75,2021-01-01,3,18.641136
4,"islip,ny",2018-01-01 04:00:00,0.0,10,20,29,74,2021-01-01,4,18.019765


In [5]:
airport_weather_df.dtypes

location          object
date_time         object
precipMM         float64
visibility         int64
cloudcover         int64
windspeedKmph      int64
humidity           int64
date              object
hour               int64
windspeed        float64
dtype: object

In [6]:
airport_weather_df.describe()

Unnamed: 0,precipMM,visibility,cloudcover,windspeedKmph,humidity,hour,windspeed
count,252960.0,252960.0,252960.0,252960.0,252960.0,252960.0,252960.0
mean,0.034721,9.213824,43.878091,13.523929,70.751731,11.5,8.40338
std,0.222248,2.120176,37.572798,8.238974,20.570284,6.9222,5.119461
min,0.0,0.0,0.0,0.0,3.0,0.0,0.0
25%,0.0,10.0,7.0,7.0,57.0,5.75,4.349598
50%,0.0,10.0,35.0,12.0,76.0,11.5,7.456454
75%,0.0,10.0,83.0,18.0,87.0,17.25,11.184681
max,13.5,10.0,100.0,69.0,100.0,23.0,42.874612


In [7]:
# Round floating point values
airport_weather_df["windspeed"] = airport_weather_df["windspeed"].round(0)
airport_weather_df["precipMM"] = airport_weather_df["precipMM"].round(2)

In [8]:
# Drop columns that have been reformated
airport_weather_df.drop(["windspeedKmph", "date_time"], axis=1, inplace=True)

In [9]:
# Rename windspeed column to remove unit suffix
airport_weather_df.rename({"precipMM":"precipitation"}, inplace=True)

In [10]:
airport_weather_df.head()

Unnamed: 0,location,precipMM,visibility,cloudcover,humidity,date,hour,windspeed
0,"islip,ny",0.0,10,61,77,2021-01-01,0,17.0
1,"islip,ny",0.0,10,50,76,2021-01-01,1,17.0
2,"islip,ny",0.0,10,40,76,2021-01-01,2,18.0
3,"islip,ny",0.0,10,29,75,2021-01-01,3,19.0
4,"islip,ny",0.0,10,20,74,2021-01-01,4,18.0


# Get Airport Information

In [11]:
# Get airport information from database
sql="SELECT code, city, state FROM airports"
url="postgresql://postgres:faB17&rigeme@flightsdata.cxtoxxxge4vx.us-east-2.rds.amazonaws.com:5432/flightsdata"
airport_code_city_df = pd.read_sql(sql, url)

In [12]:
## Create airport location field to join with weather data on location field
airport_code_city_df["location"] = airport_code_city_df["city"].str.lower().str.replace(" ", "")

airport_code_city_df.loc[(airport_code_city_df["location"] == "orangecounty/santaana"),["location"]] = "santaana"
airport_code_city_df.loc[(airport_code_city_df["location"] == "washington-dulles"),["location"]] = "washington"
airport_code_city_df.loc[(airport_code_city_df["location"] == "washington-reagannational"),["location"]] = "washington"
airport_code_city_df.loc[(airport_code_city_df["location"] == "orangecounty/santaana"),["location"]] = "santaana"
airport_code_city_df.loc[(airport_code_city_df["location"] == "honolulu-islandofoahu"),["location"]] = "honolulu"
airport_code_city_df.loc[(airport_code_city_df["location"] == "chicago-midway"),["location"]] = "chicago"
airport_code_city_df.loc[(airport_code_city_df["location"] == "chicago-o'hare"),["location"]] = "chicago"
airport_code_city_df.loc[(airport_code_city_df["location"] == "raleigh/durham"),["location"]] = "durham"
airport_code_city_df.loc[(airport_code_city_df["location"] == "newyork-kennedy"),["location"]] = "newyork"
airport_code_city_df.loc[(airport_code_city_df["location"] == "newyork-laguardia"),["location"]] = "newyork"
airport_code_city_df.loc[(airport_code_city_df["location"] == "dallas/fortworth"),["location"]] = "dallas"
airport_code_city_df.loc[(airport_code_city_df["location"] == "houston-georgebushintercontinental"),["location"]] ="houston"
airport_code_city_df.loc[(airport_code_city_df["location"] == "houston-hobby"),["location"]] = "houston"
airport_code_city_df.loc[(airport_code_city_df["location"] == "seattle/tacoma"),["location"]] = "seattle"

airport_code_city_df["location"] = airport_code_city_df["location"] + "," + airport_code_city_df["state"].str.lower()
airport_code_city_df.head(50)

Unnamed: 0,code,city,state,location
0,PHX,Phoenix,AZ,"phoenix,az"
1,BUR,Burbank,CA,"burbank,ca"
2,LAX,Los Angeles,CA,"losangeles,ca"
3,OAK,Oakland,CA,"oakland,ca"
4,SNA,Orange County/Santa Ana,CA,"santaana,ca"
5,SMF,Sacramento,CA,"sacramento,ca"
6,SAN,San Diego,CA,"sandiego,ca"
7,SFO,San Francisco,CA,"sanfrancisco,ca"
8,SJC,San Jose,CA,"sanjose,ca"
9,DEN,Denver,CO,"denver,co"


In [13]:
# Drop city and state columns
airport_code_city_df.drop(["city", "state"], axis=1, inplace=True)

# Join Weather Data with Airport Data

In [14]:
## Join weather data with airport data
airport_weather_df = airport_weather_df.join(airport_code_city_df.set_index("location"), on="location", how="inner")
airport_weather_df.head(20)

Unnamed: 0,location,precipMM,visibility,cloudcover,humidity,date,hour,windspeed,code
744,"sanfrancisco,ca",0.0,10,24,80,2021-01-01,0,14.0,SFO
745,"sanfrancisco,ca",0.0,10,28,79,2021-01-01,1,12.0,SFO
746,"sanfrancisco,ca",0.0,10,31,79,2021-01-01,2,11.0,SFO
747,"sanfrancisco,ca",0.1,10,35,78,2021-01-01,3,10.0,SFO
748,"sanfrancisco,ca",0.0,10,40,77,2021-01-01,4,9.0,SFO
749,"sanfrancisco,ca",0.0,10,45,77,2021-01-01,5,7.0,SFO
750,"sanfrancisco,ca",0.0,10,50,76,2021-01-01,6,6.0,SFO
751,"sanfrancisco,ca",0.0,10,48,73,2021-01-01,7,6.0,SFO
752,"sanfrancisco,ca",0.1,9,46,69,2021-01-01,8,6.0,SFO
753,"sanfrancisco,ca",0.1,9,45,66,2021-01-01,9,5.0,SFO


In [16]:
airport_weather_df.drop(["location"], axis=1, inplace=True)
airport_weather_df = airport_weather_df[["code", "date", "hour", "visibility", "cloudcover", "windspeed", "humidity", "preciptation"]]

KeyError: "['location'] not found in axis"

In [None]:
airport_weather_df.head()

In [None]:
airport_weather_df