In [2]:
import numpy as np
import pandas as pd
import os
import json
import requests

pd.set_option('display.max_columns',None)

In [3]:
flight_df = pd.read_feather('data/v2_clean_flight')

In [4]:
def lat_lon_api(city):
    url = 'http://api.positionstack.com/v1/forward'
    position_stack_key = os.environ['position_stack_key']
    params = {'query':city,
                'access_key':position_stack_key,
                'limit': 1}
    response = requests.get(url, params=params).json()
    return response


In [5]:
sample_weather_df = flight_df[:100].copy()

In [6]:
def lat_grabber(df):
    lat_list = []
    lon_list = []
    for city in df['origin_city_name']:
        response = lat_lon_api(city)
        # print(city)
        # print(response)
        if 'latitude' not in response['data'][0]:
            lat_list.append('None')
        else:
            lat = response['data'][0]['latitude']
            lat_list.append(lat)
        if 'longitude' not in response['data'][0]:
            lon_list.append('None')
        else:
            lon = response['data'][0]['longitude']
            lon_list.append(lon)
    return lat_list,lon_list

In [7]:
lat_list, lon_list = lat_grabber(sample_weather_df)

In [8]:
sample_weather_df['latitude'] = lat_list
sample_weather_df['longitude'] = lon_list
sample_weather_df

Unnamed: 0,fl_date,mkt_unique_carrier,branded_code_share,mkt_carrier,mkt_carrier_fl_num,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,origin,origin_city_name,dest_airport_id,dest,dest_city_name,crs_dep_time,dep_delay,taxi_out,taxi_in,crs_arr_time,arr_delay,crs_elapsed_time,distance,month,flight_duration,hour,state/country,latitude,longitude
0,2019-10-28,AA,AA_CODESHARE,AA,4377,YX,N115HQ,4377,10785,BTV,"Burlington, VT",14100,PHL,"Philadelphia, PA",543,-4.0,10.0,8.0,713,-15.0,90.0,335.0,10,short,7,VT,44.481922,-73.227342
1,2018-06-04,DL,DL_CODESHARE,DL,5491,EV,N741EV,5491,12953,LGA,"New York, NY",11042,CLE,"Cleveland, OH",1415,-5.0,24.0,7.0,1603,-16.0,108.0,419.0,6,short,16,NY,40.682950,-73.970800
2,2018-07-31,WN,WN,WN,562,WN,N479WN,562,13198,MCI,"Kansas City, MO",13487,MSP,"Minneapolis, MN",940,-4.0,9.0,5.0,1055,-8.0,75.0,393.0,7,short,10,MO,39.051003,-94.541461
3,2019-10-05,WN,WN,WN,4755,WN,N452WN,4755,14107,PHX,"Phoenix, AZ",14570,RNO,"Reno, NV",1755,21.0,6.0,2.0,1935,13.0,100.0,601.0,10,short,19,AZ,33.605030,-112.070892
4,2018-05-11,WN,WN,WN,1002,WN,N706SW,1002,11884,GEG,"Spokane, WA",13796,OAK,"Oakland, CA",515,1.0,9.0,5.0,720,-14.0,125.0,723.0,5,medium,7,WA,47.660011,-117.405800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2018-08-22,UA,UA_CODESHARE,UA,4891,C5,N16147,4891,11618,EWR,"Newark, NJ",13244,MEM,"Memphis, TN",1455,68.0,28.0,4.0,1644,78.0,169.0,946.0,8,medium,16,NJ,40.718707,-74.185043
96,2019-05-27,AA,AA,AA,1747,AA,N501AA,1747,11298,DFW,"Dallas/Fort Worth, TX",14730,SDF,"Louisville, KY",1240,8.0,19.0,5.0,1546,-3.0,126.0,733.0,5,medium,15,TX,32.729442,-97.331810
97,2019-05-16,DL,DL,DL,1367,DL,N964DL,1367,11986,GRR,"Grand Rapids, MI",10397,ATL,"Atlanta, GA",1109,-6.0,10.0,5.0,1305,-18.0,116.0,640.0,5,short,13,MI,42.966709,-85.661973
98,2019-10-13,AS,AS_CODESHARE,AS,2101,QX,N421QX,2101,14252,PSC,"Pasco/Kennewick/Richland, WA",14747,SEA,"Seattle, WA",550,0.0,13.0,5.0,655,-6.0,65.0,172.0,10,short,6,WA,46.295086,-119.295500


In [9]:
def weather_api(lat,lon,start):
    url = 'https://archive-api.open-meteo.com/v1/archive'
    params = {'latitude':lat,
                'longitude':lon,
                'start_date':start,
                'end_date':start,
                'hourly':'cloudcover',
                'daily':['rain_sum','snowfall_sum'],
                'timezone': 'America/New_York'
                }
    response = requests.get(url, params=params).json()
    return response

In [10]:
def weather_grabber(data):
    cloud_list = []
    rain_list = []
    snow_list = []
    sunny_list = []
    for info in data.values:
        date = info[0]
        lat = info[-2]
        lon = info[-1]
        response = weather_api(lat,lon,date)
        cloud = np.mean(response['hourly']['cloudcover'])
        sunny = 100 - cloud
        cloud_list.append(cloud)
        sunny_list.append(sunny)
        rain_list.append(response['daily']['rain_sum'][0])
        snow_list.append(response['daily']['snowfall_sum'][0])
    return cloud_list,rain_list,snow_list,sunny_list

In [11]:
cloud,rain,snow,sunny = weather_grabber(sample_weather_df)

In [13]:
sample_weather_df['cloud(%)'] = cloud
sample_weather_df['sunny(%)'] = sunny
sample_weather_df['rain'] = rain
sample_weather_df['snow'] = snow
sample_weather_df

Unnamed: 0,fl_date,mkt_unique_carrier,branded_code_share,mkt_carrier,mkt_carrier_fl_num,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,origin,origin_city_name,dest_airport_id,dest,dest_city_name,crs_dep_time,dep_delay,taxi_out,taxi_in,crs_arr_time,arr_delay,crs_elapsed_time,distance,month,flight_duration,hour,state/country,latitude,longitude,cloud(%),sunny(%),rain,snow
0,2019-10-28,AA,AA_CODESHARE,AA,4377,YX,N115HQ,4377,10785,BTV,"Burlington, VT",14100,PHL,"Philadelphia, PA",543,-4.0,10.0,8.0,713,-15.0,90.0,335.0,10,short,7,VT,44.481922,-73.227342,73.250000,26.750000,0.5,0.0
1,2018-06-04,DL,DL_CODESHARE,DL,5491,EV,N741EV,5491,12953,LGA,"New York, NY",11042,CLE,"Cleveland, OH",1415,-5.0,24.0,7.0,1603,-16.0,108.0,419.0,6,short,16,NY,40.682950,-73.970800,79.083333,20.916667,7.4,0.0
2,2018-07-31,WN,WN,WN,562,WN,N479WN,562,13198,MCI,"Kansas City, MO",13487,MSP,"Minneapolis, MN",940,-4.0,9.0,5.0,1055,-8.0,75.0,393.0,7,short,10,MO,39.051003,-94.541461,21.833333,78.166667,0.0,0.0
3,2019-10-05,WN,WN,WN,4755,WN,N452WN,4755,14107,PHX,"Phoenix, AZ",14570,RNO,"Reno, NV",1755,21.0,6.0,2.0,1935,13.0,100.0,601.0,10,short,19,AZ,33.605030,-112.070892,0.000000,100.000000,0.0,0.0
4,2018-05-11,WN,WN,WN,1002,WN,N706SW,1002,11884,GEG,"Spokane, WA",13796,OAK,"Oakland, CA",515,1.0,9.0,5.0,720,-14.0,125.0,723.0,5,medium,7,WA,47.660011,-117.405800,53.250000,46.750000,0.8,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2018-08-22,UA,UA_CODESHARE,UA,4891,C5,N16147,4891,11618,EWR,"Newark, NJ",13244,MEM,"Memphis, TN",1455,68.0,28.0,4.0,1644,78.0,169.0,946.0,8,medium,16,NJ,40.718707,-74.185043,58.916667,41.083333,7.9,0.0
96,2019-05-27,AA,AA,AA,1747,AA,N501AA,1747,11298,DFW,"Dallas/Fort Worth, TX",14730,SDF,"Louisville, KY",1240,8.0,19.0,5.0,1546,-3.0,126.0,733.0,5,medium,15,TX,32.729442,-97.331810,29.375000,70.625000,0.0,0.0
97,2019-05-16,DL,DL,DL,1367,DL,N964DL,1367,11986,GRR,"Grand Rapids, MI",10397,ATL,"Atlanta, GA",1109,-6.0,10.0,5.0,1305,-18.0,116.0,640.0,5,short,13,MI,42.966709,-85.661973,43.125000,56.875000,5.5,0.0
98,2019-10-13,AS,AS_CODESHARE,AS,2101,QX,N421QX,2101,14252,PSC,"Pasco/Kennewick/Richland, WA",14747,SEA,"Seattle, WA",550,0.0,13.0,5.0,655,-6.0,65.0,172.0,10,short,6,WA,46.295086,-119.295500,50.833333,49.166667,0.4,0.0


In [14]:
sample_weather_df.to_feather('data/flight_weather_100')