In [None]:
# Whereas the other notebook is to create a template from the JHU data and start making
# API calls from scratch, if that notebook is interrupted, this one will pick up where
# that one left off.

In [1]:
import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime as dt

# reading CSV files to create dataframes
df_confirmed = pd.read_csv('./csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')

df_tMax = pd.read_csv('./csv/tMax_US.csv')
df_tMin = pd.read_csv('./csv/tMin_US.csv')
df_humidity = pd.read_csv('./csv/humidity_US.csv')
df_uvIndex = pd.read_csv('./csv/uv_US.csv')
df_cloud = pd.read_csv('./csv/cloud_US.csv')
df_precipprob = pd.read_csv('./csv/precip_US.csv')
df_dewpoint = pd.read_csv('./csv/dew_US.csv')
df_pressure = pd.read_csv('./csv/pressure_US.csv')
df_windspeed = pd.read_csv('./csv/wind_US.csv')
df_ozone = pd.read_csv('./csv/ozone_US.csv')
df_sunrise = pd.read_csv('./csv/sunrise_US.csv')
df_sunset = pd.read_csv('./csv/sunset_US.csv')


In [2]:
df_tMax.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,8/3/20,8/4/20,8/5/20,8/6/20,8/7/20,8/8/20,8/9/20,8/10/20,8/11/20,8/12/20
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,0,0,0,0,0,0,0,0,0,0
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,...,0,0,0,0,0,0,0,0,0,0
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,0,0,0,0,0,0,0,0,0,0
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,0,0,0,0,0,0,0,0,0,0
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Resuming the API calls

# Code to increase number of retries on connection errors,
# and also to give it some time.
# Found on https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
# And https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

s = requests.Session()

retries = Retry(total=30,
                backoff_factor=0.1,
                status_forcelist=[ 429, 500, 502, 503, 504 ],
                method_whitelist=["HEAD", "GET", "OPTIONS"])

adapter = HTTPAdapter(max_retries=retries)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)

# pull data from darksky weather API

# Columns to be skipped when iterating through the DataFrame
do_not_include = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
                 'Country_Region', 'Lat', 'Long_', 'Combined_Key']

#Darksky API key
API_KEY = '723a6f9dbda64ae1e0b9fdde14ba752e'

# counter
counter = 0

# Dummy value in case of errors
dummy = -1000

# variable for determining how many API calls between writing data to CSV
write_var = 1000

# Start iterating through the date columns
for x in df_tMax.columns.values:
    
    # Skip the columns that are not dates
    if (x not in do_not_include):
        
        # Create Unix time stamp out of the date column
        t = pd.to_datetime(df_tMax[x].name)
        t = int(t.value / 10**9)
        t = str(t)
                
        # Start iterating through the rows (locations)
        for y in range(df_tMax['1/22/20'].values.size):
            
            # Only do API call if the cell value is 0
            if str(df_tMax.iloc[y][x]) == '0':
                
                print('Cell is 0')
                
                # latitude and longitude coordinates of the row to be passed to the API            
                latitude = str(df_tMax.iloc[y][8])
                longitude = str(df_tMax.iloc[y][9])

                # Building the URL for the API get
                url = 'https://api.darksky.net/forecast/' + API_KEY + '/' + latitude + "," + longitude + ',' + t
                url = url + '?exclude=currently,flags&units=si'

                # Getting the API call
                # using the retry error handling established above
                response = http.get(url)
                
                # Putting the API response into the JSON thing
                info = json.loads(response.content)

                # adding error handling in case something is wrong with the JSON response
                try:

                    # Making a variable to more easily acccess JSON response data
                    easy_info = info['daily']['data'][0]

                    # Reading the JSON data
                    tMax = easy_info['temperatureHigh']
                    tMin = easy_info['temperatureLow']
                    hum = easy_info['humidity'] * 100
                    uvee = easy_info['uvIndex']
                    clouds = easy_info['cloudCover'] * 100
                    precip = easy_info['precipProbability'] * 100
                    dew = easy_info['dewPoint']
                    pressure = easy_info['pressure']
                    wind = easy_info['windSpeed']
                    ozone = easy_info['ozone']
                    sunrise = easy_info['sunriseTime']
                    sunset = easy_info['sunsetTime']

                except:

                    # Creating dummy values in case of error
                    print('Error encountered')
                    tMax = dummy
                    tMin = dummy
                    hum = dummy
                    uvee = dummy
                    clouds = dummy
                    precip = dummy
                    dew = dummy
                    pressure = dummy
                    wind = dummy
                    ozone = dummy
                    sunrise = dummy
                    sunset = dummy

                # Recording the data into the respective dataframes
                df_tMax.at[y, x] = tMax
                df_tMin.at[y, x] = tMin
                df_humidity.at[y, x] = hum
                df_uvIndex.at[y, x] = uvee
                df_cloud.at[y, x] = clouds
                df_precipprob.at[y, x] = precip
                df_dewpoint.at[y, x] = dew
                df_pressure.at[y, x] = pressure
                df_windspeed.at[y, x] = wind
                df_ozone.at[y, x] = ozone
                df_sunrise.at[y,x] = sunrise
                df_sunset.at[y,x] = sunset
            
            counter = counter + 1
            print(counter)

            # writing CSVs of what I've got so far, for every write_var API calls
            if counter % write_var == 0:
                
                print('1000 API calls')
                df_tMax.to_csv('./csv/tMax_US.csv', index=False)
                df_tMin.to_csv('./csv/tMin_US.csv', index=False)
                df_humidity.to_csv('./csv/humidity_US.csv', index=False)
                df_uvIndex.to_csv('./csv/uv_US.csv', index=False)
                df_cloud.to_csv('./csv/cloud_US.csv', index=False)
                df_precipprob.to_csv('./csv/precip_US.csv', index=False)
                df_dewpoint.to_csv('./csv/dew_US.csv', index=False)
                df_pressure.to_csv('./csv/pressure_US.csv', index=False)
                df_windspeed.to_csv('./csv/wind_US.csv', index=False)
                df_ozone.to_csv('./csv/ozone_US.csv', index=False)
                df_sunrise.to_csv('./csv/sunrise_US.csv', index=False)
                df_sunset.to_csv('./csv/sunset_US.csv', index=False)

# Writing final data to csv
print('Final data write')
df_tMax.to_csv('./csv/tMax_US.csv', index=False)
df_tMin.to_csv('./csv/tMin_US.csv', index=False)
df_humidity.to_csv('./csv/humidity_US.csv', index=False)
df_uvIndex.to_csv('./csv/uv_US.csv', index=False)
df_cloud.to_csv('./csv/cloud_US.csv', index=False)
df_precipprob.to_csv('./csv/precip_US.csv', index=False)
df_dewpoint.to_csv('./csv/dew_US.csv', index=False)
df_pressure.to_csv('./csv/pressure_US.csv', index=False)
df_windspeed.to_csv('./csv/wind_US.csv', index=False)
df_ozone.to_csv('./csv/ozone_US.csv', index=False)
df_sunrise.to_csv('./csv/sunrise_US.csv', index=False)
df_sunset.to_csv('./csv/sunset_US.csv', index=False)

In [None]:
# Writing data to csv
df_tMax.to_csv('./csv/tMax_US.csv', index=False)
df_tMin.to_csv('./csv/tMin_US.csv', index=False)
df_humidity.to_csv('./csv/humidity_US.csv', index=False)
df_uvIndex.to_csv('./csv/uv_US.csv', index=False)
df_cloud.to_csv('./csv/cloud_US.csv', index=False)
df_precipprob.to_csv('./csv/precip_US.csv', index=False)
df_dewpoint.to_csv('./csv/dew_US.csv', index=False)
df_pressure.to_csv('./csv/pressure_US.csv', index=False)
df_windspeed.to_csv('./csv/wind_US.csv', index=False)
df_ozone.to_csv('./csv/ozone_US.csv', index=False)
df_sunrise.to_csv('./csv/sunrise_US.csv', index=False)
df_sunset.to_csv('./csv/sunset_US.csv', index=False)

In [None]:
# test cell

# Resuming the API calls

# Code to increase number of retries on connection errors,
# and also to give it some time.
# Found on https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
# And https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/

from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

s = requests.Session()

retries = Retry(total=30,
                backoff_factor=0.1,
                status_forcelist=[ 429, 500, 502, 503, 504 ],
                method_whitelist=["HEAD", "GET", "OPTIONS"])

adapter = HTTPAdapter(max_retries=retries)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)

# pull data from darksky weather API

# Columns to be skipped when iterating through the DataFrame
do_not_include = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
                 'Country_Region', 'Lat', 'Long_', 'Combined_Key']

#Darksky API key
API_KEY = '723a6f9dbda64ae1e0b9fdde14ba752e'

# counter
counter = 0

# Dummy value in case of errors
dummy = -1000

# variable for determining how many API calls between writing data to CSV
write_var = 1000

# Start iterating through the date columns
for x in df_tMax.columns.values:
    
    # Skip the columns that are not dates
    if (x not in do_not_include):
        
        # Create Unix time stamp out of the date column
        t = pd.to_datetime(df_tMax[x].name)
        t = int(t.value / 10**9)
        t = str(t)
                
        # Start iterating through the rows (locations)
        for y in range(df_tMax['1/22/20'].values.size):
            
            # Only do API call if the cell value is 0
            if str(df_tMax.iloc[y][x]) == '0':
                
                print('Cell is 0')
            
            else:
                
                print(str(df_tMax.iloc[y][x]))
            
            counter = counter + 1
            print(counter)

            # writing CSVs of what I've got so far, for every write_var API calls
            if counter % write_var == 0:
                
                print('1000 API calls')

