# Imports

In [1]:
import pandas as pd
import psycopg2

# Import Airport Weather Data

In [2]:
airport_weather_df = pd.read_csv("Database/weather/weather_data/weather_2018_df.csv")

# Rename precipMM column to remove unit suffix
airport_weather_df.rename(columns={"precipMM":"precipitation", "windspeedKmph":"windspeed"}, inplace=True)

# Create Date and Hour Column
airport_weather_df["date"] = airport_weather_df["date_time"].str[0:10]
airport_weather_df["date"] = "2021" + airport_weather_df["date"].str[4:10]
airport_weather_df["hour"] = airport_weather_df["date_time"].str[11:13].apply(int)

# Drop columns that have been reformated
airport_weather_df.drop(["date_time"], axis=1, inplace=True)

print(f"There are {len(airport_weather_df)} rows of weather data")

airport_weather_df.head()

There are 252960 rows of weather data


Unnamed: 0,location,precipitation,visibility,cloudcover,windspeed,humidity,date,hour
0,"islip,ny",0.0,10,61,27,77,2021-01-01,0
1,"islip,ny",0.0,10,50,28,76,2021-01-01,1
2,"islip,ny",0.0,10,40,29,76,2021-01-01,2
3,"islip,ny",0.0,10,29,30,75,2021-01-01,3
4,"islip,ny",0.0,10,20,29,74,2021-01-01,4


# Get Dashboard Airport Codes

In [3]:
# Get airport information from database
sql="SELECT code FROM airports"
url="postgresql://postgres:faB17&rigeme@flightsdata.cxtoxxxge4vx.us-east-2.rds.amazonaws.com:5432/flightsdata"
airport_codes_df = pd.read_sql(sql, url)

# Get Weather City to Airport Code Reference Data

In [4]:
airport_weather_city_code_df = pd.read_csv("Flight_data_files/city_code_api.csv")
airport_weather_city_code_df.rename({"Airport Code":"code", "City":"location"}, axis=1, inplace=True)
airport_weather_city_code_df.drop(["Unnamed: 0"], axis=1, inplace=True)
print(f"There weather data for {len(airport_weather_city_code_df)} cities")

There weather data for 347 cities


# Filter Weather City to Airport Code Reference Data

In [5]:
# Filter to include only cities with the dashboard airport codes
airport_weather_city_code_df = airport_weather_city_code_df.join(airport_codes_df.set_index("code"), how="inner", on=["code"])
airport_weather_city_code_df.reset_index(drop=True, inplace=True)
airport_weather_city_code_df.head(50)

Unnamed: 0,code,location
0,ATL,"atlanta,ga"
1,AUS,"austin,tx"
2,BWI,"baltimore,md"
3,BOS,"boston,ma"
4,BUR,"burbank,ca"
5,CLT,"charlotte,nc"
6,MDW,"chicago,il"
7,ORD,"chicago,il"
8,CVG,"cincinnati,oh"
9,CLE,"cleveland,oh"


# Filter Weather Data for the Cites with Dashboard Airport Codes

In [6]:
# Filter weather data for cities with dashboard airport codes
airport_weather_df = airport_weather_df.join(airport_weather_city_code_df.set_index("location"), on="location", how="inner")

In [7]:
airport_weather_df.drop(["location"], axis=1, inplace=True)
airport_weather_df = airport_weather_df[["code", "date", "hour", "visibility", "cloudcover", "windspeed", "humidity", "precipitation"]]
print(f"The weather data for the 50 dashboard cities contains {len(airport_weather_df)} rows")
airport_weather_df.head(20)

The weather data for the 50 dashboard cities contains 37200 rows


Unnamed: 0,code,date,hour,visibility,cloudcover,windspeed,humidity,precipitation
744,SFO,2021-01-01,0,10,24,22,80,0.0
745,SFO,2021-01-01,1,10,28,20,79,0.0
746,SFO,2021-01-01,2,10,31,18,79,0.0
747,SFO,2021-01-01,3,10,35,16,78,0.1
748,SFO,2021-01-01,4,10,40,14,77,0.0
749,SFO,2021-01-01,5,10,45,12,77,0.0
750,SFO,2021-01-01,6,10,50,10,76,0.0
751,SFO,2021-01-01,7,10,48,9,73,0.0
752,SFO,2021-01-01,8,9,46,9,69,0.1
753,SFO,2021-01-01,9,9,45,8,66,0.1


In [8]:
airport_weather_df.to_csv("Flight_data_files/airport_weather.csv", index=False)