## Aquire weather data for test_flights

In [1]:
import pandas as pd
import numpy as np

In [2]:
import datetime as dt

In [3]:
import requests
import time

### Create date/city unique combos table

In [5]:
flight_unique = pd.read_csv('data/flights_test_uniques.csv', index_col=0)

In [6]:
flight_unique.head()

Unnamed: 0,fl_date,city,count,weather_categ
758,2020-01-03,"Chicago, IL",2591,empty
408,2020-01-02,"Chicago, IL",2587,empty
1463,2020-01-05,"Chicago, IL",2556,empty
1813,2020-01-06,"Chicago, IL",2556,empty
2166,2020-01-07,"Chicago, IL",2413,empty


### Import lat long table
table was cleaned in EDA_Task3 notebook

In [7]:
#api requires lat/long, found a csv of american cities and lat/longs
latlong_df = pd.read_csv('data/uscities_ll.csv')

In [8]:
latlong_df.shape

(30409, 6)

In [9]:
latlong_df

Unnamed: 0,city,state_id,lat,lng,timezone,city_state
0,New York,NY,40.6943,-73.9249,America/New_York,"New York, NY"
1,Los Angeles,CA,34.1141,-118.4068,America/Los_Angeles,"Los Angeles, CA"
2,Chicago,IL,41.8375,-87.6866,America/Chicago,"Chicago, IL"
3,Miami,FL,25.7840,-80.2101,America/New_York,"Miami, FL"
4,Dallas,TX,32.7935,-96.7667,America/Chicago,"Dallas, TX"
...,...,...,...,...,...,...
30404,Drummond,ID,43.9996,-111.3433,America/Boise,"Drummond, ID"
30405,Lost Springs,WY,42.7652,-104.9255,America/Denver,"Lost Springs, WY"
30406,Provo,SD,43.1937,-103.8329,America/Denver,"Provo, SD"
30407,Goldcreek,MT,46.5838,-112.9284,America/Denver,"Goldcreek, MT"


In [10]:
latlong_df['city_state'].nunique()

30351

In [11]:
latlong_df = latlong_df.drop_duplicates(subset='city_state', keep="first")

In [12]:
latlong_df

Unnamed: 0,city,state_id,lat,lng,timezone,city_state
0,New York,NY,40.6943,-73.9249,America/New_York,"New York, NY"
1,Los Angeles,CA,34.1141,-118.4068,America/Los_Angeles,"Los Angeles, CA"
2,Chicago,IL,41.8375,-87.6866,America/Chicago,"Chicago, IL"
3,Miami,FL,25.7840,-80.2101,America/New_York,"Miami, FL"
4,Dallas,TX,32.7935,-96.7667,America/Chicago,"Dallas, TX"
...,...,...,...,...,...,...
30404,Drummond,ID,43.9996,-111.3433,America/Boise,"Drummond, ID"
30405,Lost Springs,WY,42.7652,-104.9255,America/Denver,"Lost Springs, WY"
30406,Provo,SD,43.1937,-103.8329,America/Denver,"Provo, SD"
30407,Goldcreek,MT,46.5838,-112.9284,America/Denver,"Goldcreek, MT"


### API functions

In [13]:
#open weather api function
def open_weather_api(lat, long, date, timezone = "America%2FChicago"):
    """
    function returns individual api calls by location and date. 
    """
    #timezone = America%2FChicago #timezone format, opportunity to fine tune this - doesn't work for some reason, I think formatting
    
    url = f"https://archive-api.open-meteo.com/v1/era5?latitude={lat}&longitude={long}&start_date={date}&end_date={date}&hourly=precipitation,rain,snowfall,cloudcover&daily=precipitation_sum,rain_sum,snowfall_sum,precipitation_hours&timezone={timezone}"
    response = requests.get(url)
    return response.json()

In [14]:
test_api = open_weather_api("41.75","-87.75","2018-10-25", "America%2FNew_York")
test_api

{'latitude': 41.75,
 'longitude': -87.75,
 'generationtime_ms': 0.4220008850097656,
 'utc_offset_seconds': -14400,
 'timezone': 'America/New_York',
 'timezone_abbreviation': 'EDT',
 'elevation': 192.0,
 'hourly_units': {'time': 'iso8601',
  'precipitation': 'mm',
  'rain': 'mm',
  'snowfall': 'cm',
  'cloudcover': '%'},
 'hourly': {'time': ['2018-10-25T00:00',
   '2018-10-25T01:00',
   '2018-10-25T02:00',
   '2018-10-25T03:00',
   '2018-10-25T04:00',
   '2018-10-25T05:00',
   '2018-10-25T06:00',
   '2018-10-25T07:00',
   '2018-10-25T08:00',
   '2018-10-25T09:00',
   '2018-10-25T10:00',
   '2018-10-25T11:00',
   '2018-10-25T12:00',
   '2018-10-25T13:00',
   '2018-10-25T14:00',
   '2018-10-25T15:00',
   '2018-10-25T16:00',
   '2018-10-25T17:00',
   '2018-10-25T18:00',
   '2018-10-25T19:00',
   '2018-10-25T20:00',
   '2018-10-25T21:00',
   '2018-10-25T22:00',
   '2018-10-25T23:00'],
  'precipitation': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
  

In [16]:
def city_scrubber(city_str):
    """
    city name will be scrubbed to remove multiple names
    Dallas/Fort Something, TX -----> Dallas, TX
    """
    try:
        state = city_str.split(',')       #split city(s) and state
        cities = state[0].split('/')      #split the list of cities by /
        return cities[0] + ',' + state[1]
    except: 
        return "no state in your city string"

In [17]:
#test
city_scrubber("Dallas/Fort Something, TX")

'Dallas, TX'

In [18]:
city_scrubber("Austin, TX")

'Austin, TX'

In [19]:
city_scrubber("Austin")

'no state in your city string'

In [20]:
#function that returns the weather condition for a date/city
def weather_condition(city, date, lat_long_df):
    """
    returns the weather condition for a single date/city. 
    city format "city, ST"
    date format "2020-01-01"
    """
    #scrub city
    #print(city)
    city_scrub = city_scrubber(city)
    if city_scrub == 'no state in your city string':
        return city_scrub, city
    
    #pull the lat and long for city
    lat = None
    long = None
    try:
        lat = float(lat_long_df['lat'][lat_long_df['city_state'] == city_scrub])
        long = float(lat_long_df['lng'][lat_long_df['city_state'] == city_scrub])
    except:
        print(lat, long, city)
        pass
    
    #call the weather api
    if lat and long:         #none is False
        json_result = open_weather_api(lat, long, date)
        #print(json_result)
        try:
            total_rain = int(json_result['daily']['rain_sum'][0])                  # mm
        except: 
            total_rain = None
        try:
            total_snow = int(json_result['daily']['snowfall_sum'][0])              # cm        
        except:
            total_snow = None
        try:
            cloudcover_mean = round(sum(json_result['hourly']['cloudcover'])/len(json_result['hourly']['cloudcover']))     # %
        except:
            cloudcover_mean = None

        if total_rain is None and total_snow is None and cloudcover_mean is None:
            result = "no weather data"    
        elif total_rain == 0 and total_snow == 0 and cloudcover_mean < 40:
            result = "sunny"
        elif total_rain == 0 and total_snow == 0 and cloudcover_mean >= 40:
            result = "cloudy"
        elif total_rain > 0 and total_snow == 0:
            result = "rain"
        elif total_rain == 0 and total_snow > 0:
            result = "snow"
        elif total_rain > 0 and total_snow > 0:
            result = "snow & rain"
        else:
            result = f"Error, total rain: {total_rain}, total snow: {total_snow}, mean cloud: {cloudcover_mean}"

        return result  

In [21]:
latlong_df['lat'][latlong_df['city_state'] == "Denver, CO"]

18    39.762
Name: lat, dtype: float64

In [22]:
#test
weather_condition("Denver, CO", "2019-11-23", latlong_df)

'sunny'

In [26]:
from tqdm import tqdm

In [27]:
for row in tqdm(range(len(flight_unique))):
    flight_unique['weather_categ'][row] = weather_condition(flight_unique['city'][row], flight_unique['fl_date'][row], latlong_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flight_unique['weather_categ'][row] = weather_condition(flight_unique['city'][row], flight_unique['fl_date'][row], latlong_df)
  0%|                                                                                 | 2/2450 [00:00<13:42,  2.98it/s]

None None Adak Island, AK


  1%|▌                                                                               | 16/2450 [00:05<13:55,  2.91it/s]

None None Ashland, WV


  1%|▊                                                                               | 25/2450 [00:08<17:04,  2.37it/s]

None None Barrow, AK


  2%|█▊                                                                              | 56/2450 [00:22<31:06,  1.28it/s]

None None Charlotte Amalie, VI


  3%|██                                                                              | 62/2450 [00:24<18:24,  2.16it/s]

None None Christiansted, VI


  3%|██▌                                                                             | 80/2450 [00:31<13:34,  2.91it/s]

None None Deadhorse, AK


  3%|██▊                                                                             | 85/2450 [00:33<19:04,  2.07it/s]

None None Devils Lake, ND


  5%|████                                                                           | 125/2450 [00:49<13:37,  2.84it/s]

None None Guam, TT


  6%|████▍                                                                          | 138/2450 [00:55<20:48,  1.85it/s]

None None Hilton Head, SC


  6%|████▌                                                                          | 141/2450 [00:55<14:22,  2.68it/s]

None None Hoolehua, HI


  6%|████▊                                                                          | 148/2450 [00:57<12:37,  3.04it/s]

None None Islip, NY


  7%|█████▍                                                                         | 168/2450 [01:06<21:28,  1.77it/s]

None None Kona, HI


  7%|█████▌                                                                         | 173/2450 [01:08<16:22,  2.32it/s]

None None Lanai, HI


  9%|███████▎                                                                       | 225/2450 [01:29<13:29,  2.75it/s]

None None Newburgh/Poughkeepsie, NY


 11%|████████▉                                                                      | 277/2450 [01:47<12:22,  2.92it/s]

None None Saipan, TT


 15%|███████████▋                                                                   | 363/2450 [02:16<11:46,  2.95it/s]

None None Ashland, WV


 15%|███████████▉                                                                   | 372/2450 [02:19<11:10,  3.10it/s]

None None Barrow, AK


 16%|████████████▉                                                                  | 403/2450 [02:30<11:37,  2.93it/s]

None None Charlotte Amalie, VI


 17%|█████████████▏                                                                 | 409/2450 [02:31<11:00,  3.09it/s]

None None Christiansted, VI


 17%|█████████████▊                                                                 | 428/2450 [02:37<11:22,  2.96it/s]

None None Deadhorse, AK


 18%|█████████████▉                                                                 | 433/2450 [02:39<10:45,  3.12it/s]

None None Devils Lake, ND


 19%|███████████████▎                                                               | 473/2450 [02:52<11:23,  2.89it/s]

None None Guam, TT


 20%|███████████████▋                                                               | 487/2450 [02:57<11:27,  2.86it/s]

None None Hilton Head, SC


 20%|███████████████▊                                                               | 490/2450 [02:58<09:15,  3.53it/s]

None None Hoolehua, HI


 20%|████████████████                                                               | 497/2450 [03:00<10:27,  3.11it/s]

None None Islip, NY


 21%|████████████████▋                                                              | 517/2450 [03:06<11:08,  2.89it/s]

None None Kona, HI


 21%|████████████████▊                                                              | 522/2450 [03:08<10:08,  3.17it/s]

None None Lanai, HI


 23%|██████████████████▌                                                            | 574/2450 [03:25<10:45,  2.90it/s]

None None Newburgh/Poughkeepsie, NY


 24%|██████████████████▉                                                            | 588/2450 [03:30<10:46,  2.88it/s]

None None Pago Pago, TT


 26%|████████████████████▏                                                          | 627/2450 [03:43<10:19,  2.94it/s]

None None Saipan, TT


 29%|██████████████████████▉                                                        | 713/2450 [04:12<09:54,  2.92it/s]

None None Ashland, WV


 29%|███████████████████████▎                                                       | 722/2450 [04:15<09:45,  2.95it/s]

None None Barrow, AK


 31%|████████████████████████▎                                                      | 753/2450 [04:25<09:35,  2.95it/s]

None None Charlotte Amalie, VI


 31%|████████████████████████▍                                                      | 759/2450 [04:27<09:02,  3.12it/s]

None None Christiansted, VI


 32%|█████████████████████████                                                      | 778/2450 [04:33<09:35,  2.90it/s]

None None Deadhorse, AK


 32%|█████████████████████████▏                                                     | 783/2450 [04:34<08:30,  3.26it/s]

None None Devils Lake, ND


 34%|██████████████████████████▌                                                    | 823/2450 [04:48<09:08,  2.96it/s]

None None Guam, TT


 34%|██████████████████████████▉                                                    | 837/2450 [04:52<09:05,  2.96it/s]

None None Hilton Head, SC


 34%|███████████████████████████                                                    | 840/2450 [04:53<07:43,  3.47it/s]

None None Hoolehua, HI


 35%|███████████████████████████▎                                                   | 847/2450 [04:55<08:40,  3.08it/s]

None None Islip, NY


 35%|███████████████████████████▉                                                   | 867/2450 [05:02<09:18,  2.83it/s]

None None Kona, HI


 36%|████████████████████████████                                                   | 872/2450 [05:03<08:14,  3.19it/s]

None None Lanai, HI


 38%|█████████████████████████████▊                                                 | 924/2450 [05:20<08:41,  2.93it/s]

None None Newburgh/Poughkeepsie, NY


 40%|███████████████████████████████▌                                               | 979/2450 [05:39<08:28,  2.89it/s]

None None Saipan, TT


 43%|█████████████████████████████████▌                                            | 1053/2450 [06:04<08:09,  2.86it/s]

None None Adak Island, AK


 44%|█████████████████████████████████▉                                            | 1067/2450 [06:09<08:00,  2.88it/s]

None None Ashland, WV


 44%|██████████████████████████████████▎                                           | 1076/2450 [06:12<08:03,  2.84it/s]

None None Barrow, AK


 45%|███████████████████████████████████▏                                          | 1107/2450 [06:22<07:40,  2.92it/s]

None None Charlotte Amalie, VI


 45%|███████████████████████████████████▍                                          | 1113/2450 [06:24<08:24,  2.65it/s]

None None Christiansted, VI


 46%|████████████████████████████████████                                          | 1132/2450 [06:31<07:24,  2.97it/s]

None None Deadhorse, AK


 46%|████████████████████████████████████▏                                         | 1137/2450 [06:32<06:54,  3.17it/s]

None None Devils Lake, ND


 48%|█████████████████████████████████████▍                                        | 1177/2450 [06:45<07:18,  2.90it/s]

None None Guam, TT


 49%|█████████████████████████████████████▉                                        | 1191/2450 [06:50<07:26,  2.82it/s]

None None Hilton Head, SC


 49%|██████████████████████████████████████                                        | 1194/2450 [06:51<06:03,  3.45it/s]

None None Hoolehua, HI


 49%|██████████████████████████████████████▏                                       | 1201/2450 [06:53<06:44,  3.09it/s]

None None Islip, NY


 50%|██████████████████████████████████████▊                                       | 1221/2450 [06:59<07:00,  2.92it/s]

None None Kona, HI


 50%|███████████████████████████████████████                                       | 1226/2450 [07:00<06:13,  3.28it/s]

None None Lanai, HI


 52%|████████████████████████████████████████▋                                     | 1278/2450 [07:18<06:43,  2.90it/s]

None None Newburgh/Poughkeepsie, NY


 54%|██████████████████████████████████████████▎                                   | 1331/2450 [07:37<06:33,  2.85it/s]

None None Saipan, TT


 58%|█████████████████████████████████████████████▏                                | 1418/2450 [08:07<06:10,  2.79it/s]

None None Ashland, WV


 58%|█████████████████████████████████████████████▍                                | 1427/2450 [08:09<05:47,  2.94it/s]

None None Barrow, AK


 60%|██████████████████████████████████████████████▍                               | 1458/2450 [08:20<05:39,  2.92it/s]

None None Charlotte Amalie, VI


 60%|██████████████████████████████████████████████▌                               | 1464/2450 [08:22<05:16,  3.11it/s]

None None Christiansted, VI


 61%|███████████████████████████████████████████████▏                              | 1483/2450 [08:28<05:36,  2.88it/s]

None None Deadhorse, AK


 61%|███████████████████████████████████████████████▎                              | 1488/2450 [08:29<04:58,  3.23it/s]

None None Devils Lake, ND


 62%|████████████████████████████████████████████████▋                             | 1528/2450 [08:43<05:19,  2.89it/s]

None None Guam, TT


 63%|█████████████████████████████████████████████████                             | 1542/2450 [08:47<05:05,  2.97it/s]

None None Hilton Head, SC


 63%|█████████████████████████████████████████████████▏                            | 1545/2450 [08:48<04:15,  3.54it/s]

None None Hoolehua, HI


 63%|█████████████████████████████████████████████████▍                            | 1552/2450 [08:50<04:43,  3.17it/s]

None None Islip, NY


 64%|██████████████████████████████████████████████████                            | 1572/2450 [08:56<05:07,  2.86it/s]

None None Kona, HI


 64%|██████████████████████████████████████████████████▏                           | 1577/2450 [08:58<04:28,  3.25it/s]

None None Lanai, HI


 66%|███████████████████████████████████████████████████▊                          | 1629/2450 [09:15<04:41,  2.92it/s]

None None Newburgh/Poughkeepsie, NY


 69%|█████████████████████████████████████████████████████▌                        | 1681/2450 [09:33<04:21,  2.94it/s]

None None Saipan, TT


 72%|████████████████████████████████████████████████████████▎                     | 1768/2450 [10:03<04:03,  2.80it/s]

None None Ashland, WV


 73%|████████████████████████████████████████████████████████▌                     | 1777/2450 [10:06<03:47,  2.96it/s]

None None Barrow, AK


 74%|█████████████████████████████████████████████████████████▌                    | 1808/2450 [10:16<03:41,  2.89it/s]

None None Charlotte Amalie, VI


 74%|█████████████████████████████████████████████████████████▊                    | 1814/2450 [10:18<03:28,  3.05it/s]

None None Christiansted, VI


 75%|██████████████████████████████████████████████████████████▎                   | 1833/2450 [10:24<03:31,  2.91it/s]

None None Deadhorse, AK


 75%|██████████████████████████████████████████████████████████▌                   | 1838/2450 [10:25<03:09,  3.22it/s]

None None Devils Lake, ND


 77%|███████████████████████████████████████████████████████████▊                  | 1878/2450 [10:39<03:14,  2.95it/s]

None None Guam, TT


 77%|████████████████████████████████████████████████████████████▏                 | 1892/2450 [10:44<03:10,  2.92it/s]

None None Hilton Head, SC


 77%|████████████████████████████████████████████████████████████▎                 | 1895/2450 [10:44<02:41,  3.44it/s]

None None Hoolehua, HI


 78%|████████████████████████████████████████████████████████████▌                 | 1902/2450 [10:46<02:56,  3.10it/s]

None None Islip, NY


 78%|█████████████████████████████████████████████████████████████▏                | 1922/2450 [10:53<02:57,  2.98it/s]

None None Kona, HI


 79%|█████████████████████████████████████████████████████████████▎                | 1927/2450 [10:54<02:39,  3.27it/s]

None None Lanai, HI


 81%|███████████████████████████████████████████████████████████████               | 1979/2450 [11:12<02:45,  2.85it/s]

None None Newburgh/Poughkeepsie, NY


 81%|███████████████████████████████████████████████████████████████▌              | 1995/2450 [11:17<02:40,  2.84it/s]

None None Pago Pago, TT


 83%|████████████████████████████████████████████████████████████████▊             | 2035/2450 [11:31<02:26,  2.84it/s]

None None Saipan, TT


 87%|███████████████████████████████████████████████████████████████████▌          | 2122/2450 [12:01<01:51,  2.95it/s]

None None Ashland, WV


 87%|███████████████████████████████████████████████████████████████████▊          | 2131/2450 [12:04<01:47,  2.96it/s]

None None Barrow, AK


 88%|████████████████████████████████████████████████████████████████████▊         | 2161/2450 [12:14<01:39,  2.91it/s]

None None Charlotte Amalie, VI


 88%|████████████████████████████████████████████████████████████████████▉         | 2167/2450 [12:15<01:34,  3.00it/s]

None None Christiansted, VI


 89%|█████████████████████████████████████████████████████████████████████▌        | 2186/2450 [12:22<01:30,  2.91it/s]

None None Deadhorse, AK


 89%|█████████████████████████████████████████████████████████████████████▊        | 2191/2450 [12:23<01:19,  3.25it/s]

None None Devils Lake, ND


 91%|███████████████████████████████████████████████████████████████████████       | 2231/2450 [12:36<01:17,  2.82it/s]

None None Guam, TT


 92%|███████████████████████████████████████████████████████████████████████▍      | 2244/2450 [12:41<01:11,  2.87it/s]

None None Hilton Head, SC


 92%|███████████████████████████████████████████████████████████████████████▌      | 2247/2450 [12:41<00:58,  3.47it/s]

None None Hoolehua, HI


 92%|███████████████████████████████████████████████████████████████████████▊      | 2254/2450 [12:43<01:02,  3.14it/s]

None None Islip, NY


 93%|████████████████████████████████████████████████████████████████████████▍     | 2274/2450 [12:50<00:59,  2.93it/s]

None None Kona, HI


 93%|████████████████████████████████████████████████████████████████████████▌     | 2279/2450 [12:51<00:52,  3.25it/s]

None None Lanai, HI


 95%|██████████████████████████████████████████████████████████████████████████▏   | 2331/2450 [13:09<00:42,  2.79it/s]

None None Newburgh/Poughkeepsie, NY


 97%|███████████████████████████████████████████████████████████████████████████▋  | 2379/2450 [13:25<00:24,  2.84it/s]

None None Saipan, TT


100%|██████████████████████████████████████████████████████████████████████████████| 2450/2450 [13:50<00:00,  2.95it/s]


In [28]:
flight_unique['weather_categ']

758     cloudy
408      sunny
1463    cloudy
1813     sunny
2166     sunny
         ...  
1383     sunny
209     cloudy
278      sunny
183      sunny
2365     sunny
Name: weather_categ, Length: 2450, dtype: object

In [29]:
flight_unique

Unnamed: 0,fl_date,city,count,weather_categ
758,2020-01-03,"Chicago, IL",2591,cloudy
408,2020-01-02,"Chicago, IL",2587,sunny
1463,2020-01-05,"Chicago, IL",2556,cloudy
1813,2020-01-06,"Chicago, IL",2556,sunny
2166,2020-01-07,"Chicago, IL",2413,sunny
...,...,...,...,...
1383,2020-01-04,"Vernal, UT",2,sunny
209,2020-01-01,"Moab, UT",2,cloudy
278,2020-01-01,"Salina, KS",1,sunny
183,2020-01-01,"Liberal, KS",1,sunny


In [30]:
flight_unique.to_csv('data/flights_test_uniques.csv', header=True, index=False)

In [31]:
flight_unique.groupby(by = 'weather_categ').count()

Unnamed: 0_level_0,fl_date,city,count
weather_categ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cloudy,615,615,615
no weather data,28,28,28
rain,372,372,372
snow,136,136,136
snow & rain,29,29,29
sunny,1168,1168,1168


In [None]:
#now match that to the flights_test df