# World Weather API Notebook
- Pulls Weather Data
- Bins data into
    - sunny
    - cloudy
    - rainy
    - snow
- Adds Weather Data to Flights Table

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt

In [2]:
flights = pd.read_csv(r'C:\Users\Ryan\Lighthouse\Bootcamp\w6\midterm\Data\cleaned\cleaned_flights_sample.csv', delimiter=',', low_memory=False)
flights.head()

Unnamed: 0,fl_date,mkt_unique_carrier,branded_code_share,mkt_carrier,mkt_carrier_fl_num,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,origin,...,flights,distance,carrier_delay,weather_delay,nas_delay,security_delay,late_aircraft_delay,first_dep_time,total_add_gtime,longest_add_gtime
0,2018-10-26,UA,WN,WN,4052,EV,N8541W,2542,12954,LGB,...,1.0,528.0,,,,,,,,
1,2018-03-26,DL,UA,UA,2733,DL,N938FR,351,13930,ORD,...,1.0,404.0,,,18.0,0.0,0.0,,,
2,2019-02-06,WN,NK,NK,5531,OO,N367CA,6266,10868,CAE,...,1.0,93.0,40.0,0.0,,,0.0,,,
3,2019-11-11,DL,DL,DL,629,B6,N907WN,4607,14869,SLC,...,1.0,315.0,,,20.0,0.0,,,,
4,2019-03-25,WN,WN,WN,296,HA,N434YX,4535,14771,SFO,...,1.0,388.0,,,,,,,,


In [3]:
#Convert fl_date to a date
import time
import datetime
flights['fl_date'] = pd.to_datetime(flights['fl_date']).dt.strftime('%Y-%m-%d')

In [7]:
# DO NOT RUN THIS BLOCK - IT PULLS ALL THE WEATHER DATA - IT TAKES A LONG TIME
# I ALREADY PULLED THE CSVS!!!
#Get Weather Data from World Weather API
from wwo_hist import retrieve_hist_data
frequency=24
start_date = '01-JAN-2018'
end_date = '31-DEC-2019'
api_key = 'd385b2b23e2c4b2392535423211101'
location_list = ['ORD', 'ATL', 'DFW', 'DEN', 'LAX', 'CLT', 'SEA', 'SFO', 'PHX', 'LGA']

hist_weather_data = retrieve_hist_data(api_key,
                                location_list,
                                start_date,
                                end_date,
                                frequency,
                                location_label = False,
                                export_csv = True,
                                store_df = True)

In [4]:
# Import weather data for top 10 airports with weather delays by count
import glob

path = r'C:\Users\Ryan\Lighthouse\Bootcamp\w6\midterm\Weather\csv'
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

weather = pd.concat(li, axis=0, ignore_index=True)

In [5]:
#Convert to date and filter table
weather['date_time'] = pd.to_datetime(weather['date_time']).dt.date
weather = weather.filter(['date_time', 'location', 'totalSnow_cm', 'sunHour', 'cloudcover', 'precipMM'])

In [6]:
# Convert date column to date
weather['date_time'] = pd.to_datetime(weather['date_time']).dt.strftime('%Y-%m-%d')

In [7]:
# Add weather_type to table
weather_type = []
for i in range(len(weather)):
    if weather.iloc[i]['totalSnow_cm'] > 0:
        weather_type.append('Snowy')
    elif weather.iloc[i]['precipMM'] > 0:
        weather_type.append('Rainy')
    elif weather.iloc[i]['cloudcover'] > 50:
        weather_type.append('Cloudy')
    else:
        weather_type.append('Sunny')
weather['weather_type'] = weather_type

In [8]:
weather.head()

Unnamed: 0,date_time,location,totalSnow_cm,sunHour,cloudcover,precipMM,weather_type
0,2018-01-01,ABE,0.0,6.9,30,0.0,Sunny
1,2018-01-02,ABE,0.0,8.7,27,0.0,Sunny
2,2018-01-03,ABE,0.0,7.0,58,0.0,Cloudy
3,2018-01-04,ABE,0.2,3.5,99,4.7,Snowy
4,2018-01-05,ABE,0.0,5.2,71,0.0,Cloudy


In [9]:
#Merge flights and weather tables
flights_weather = pd.merge(flights, weather, how='left', left_on=['fl_date', 'dest'], right_on = ['date_time', 'location'])

In [10]:
flights_weather

Unnamed: 0,fl_date,mkt_unique_carrier,branded_code_share,mkt_carrier,mkt_carrier_fl_num,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,origin,...,first_dep_time,total_add_gtime,longest_add_gtime,date_time,location,totalSnow_cm,sunHour,cloudcover,precipMM,weather_type
0,2018-10-26,UA,WN,WN,4052,EV,N8541W,2542,12954,LGB,...,,,,2018-10-26,MCO,0.0,8.7,35.0,0.0,Sunny
1,2018-03-26,DL,UA,UA,2733,DL,N938FR,351,13930,ORD,...,,,,2018-03-26,ASE,0.0,11.6,9.0,0.0,Sunny
2,2019-02-06,WN,NK,NK,5531,OO,N367CA,6266,10868,CAE,...,,,,2019-02-06,PHX,0.0,10.8,23.0,1.9,Rainy
3,2019-11-11,DL,DL,DL,629,B6,N907WN,4607,14869,SLC,...,,,,2019-11-11,TUL,0.2,4.0,87.0,5.5,Snowy
4,2019-03-25,WN,WN,WN,296,HA,N434YX,4535,14771,SFO,...,,,,2019-03-25,DCA,0.0,7.5,72.0,0.5,Rainy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,2018-01-28,DL,DL,DL,1140,AA,N200NV,295,13930,ORD,...,,,,2018-01-28,ITO,0.0,11.5,30.0,3.1,Rainy
599996,2019-04-24,AA,AS,AS,3827,WN,N562JB,181,14893,SMF,...,,,,2019-04-24,DFW,0.0,8.5,88.0,67.8,Rainy
599997,2018-09-03,WN,AA_CODESHARE,AA,1570,AA,N7863A,4858,12278,ICT,...,,,,2018-09-03,IAH,0.0,12.4,52.0,7.9,Rainy
599998,2018-04-18,AS,UA,UA,5390,9E,N509JB,494,14100,PHL,...,,,,2018-04-18,FAI,0.0,14.5,6.0,0.0,Sunny


In [11]:
#Check how many rows of weather data we have
flights_weather['weather_type'].value_counts()

Rainy     284204
Sunny     261744
Cloudy     28689
Snowy      25351
Name: weather_type, dtype: int64

In [12]:
#Check for nan
# NICE!! We have pretty much all the data!! Only 15 nan
flights_weather['weather_type'].isna().sum()

12

In [13]:
# Export Weather table to csv
weather.to_csv("weather_table.csv", sep = ",")

In [14]:
# Export flights table combiend with weather table to csv
flights_weather.to_csv("flights_weather_table.csv", sep = ",")