In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import date, timedelta
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="final_proj")

def get_day_cases(end_date=date.today()):
    """
    Gets day-by-day case numbers.

    Input: end date (optional)
    Output: array of dataframes, indexed from 0 = 4/12/20
    """
    START_DATE = date(2020, 4, 12)
    GIT_REPO_PATH = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/'
    num_days = (end_date - START_DATE).days
    dfs = []
    for i in range(num_days):
        year, mo, day = (START_DATE + timedelta(days=i)).isoformat().split('-')
        file_name = '{0}-{1}-{2}.csv'.format(mo, day, year)
        dfs.append(pd.read_csv(GIT_REPO_PATH + file_name, error_bad_lines=False))
    return dfs



In [3]:
counties = pd.read_csv('covid19/abridged_couties.csv')
ca = counties.loc[counties['StateName'] == "CA"]



covid_list = get_day_cases()


In [4]:
covid = pd.DataFrame()
for i in covid_list:
    covid = covid.append(i, ignore_index=True)

covid

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3,Testing_Rate,Hospitalization_Rate
0,Alabama,US,2020-04-12 23:18:15,32.3182,-86.9023,3563,93,,3470.0,1.0,75.988020,21583.0,437.0,2.610160,84000001,USA,460.300152,12.264945
1,Alaska,US,2020-04-12 23:18:15,61.3707,-152.4044,272,8,66.0,264.0,2.0,45.504049,8038.0,31.0,2.941176,84000002,USA,1344.711576,11.397059
2,Arizona,US,2020-04-12 23:18:15,33.7298,-111.4312,3542,115,,3427.0,4.0,48.662422,42109.0,,3.246753,84000004,USA,578.522286,
3,Arkansas,US,2020-04-12 23:18:15,34.9697,-92.3731,1280,27,367.0,1253.0,5.0,49.439423,19722.0,130.0,2.109375,84000005,USA,761.753354,10.156250
4,California,US,2020-04-12 23:18:15,36.1162,-119.6816,22795,640,,22155.0,6.0,58.137726,190328.0,5234.0,2.812020,84000006,USA,485.423868,22.961176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1741,Virginia,US,2020-05-09 02:32:39,37.7693,-78.1700,22342,812,2997.0,18533.0,51.0,282.551538,129945.0,4622.0,3.634411,84000051,USA,1643.369423,20.687494
1742,Washington,US,2020-05-09 02:32:39,47.4009,-121.4905,16388,905,,15483.0,53.0,217.076768,230680.0,,5.522333,84000053,USA,3055.605854,
1743,West Virginia,US,2020-05-09 02:32:39,38.4912,-80.9545,1323,52,761.0,510.0,54.0,100.000151,59436.0,,3.930461,84000054,USA,4492.523798,
1744,Wisconsin,US,2020-05-09 02:32:39,44.2685,-89.6165,9590,384,4694.0,4512.0,55.0,185.329373,106855.0,1767.0,4.004171,84000055,USA,2065.002104,18.425443


In [8]:
str(covid['Long_'][0])

'-86.9023'

In [None]:
geolocator.reverse('61.37070, -152.40440').raw['address']

In [None]:
def get_county(lat, long):
    latitude, longitude = str(lat) + '0', str(long) + '0'
    coord = latitude + ', ' + longitude
    try:
        location = geolocator.reverse(coord)
        county = location.raw['address']['county']
    except:
        return 'None'
    else:
        if ' County' in county:
            county = county.replace(' County', '')
        return county


counties = np.vectorize(get_county)(covid['Lat'][:5], covid['Long_'][:5])
counties


  """


In [None]:
covid = covid.insert(2, 'county', counties)