In [8]:
import pandas as pd
import requests, json
from glob import glob

In [13]:
def downloadCDCVaccinationData():
    raw = requests.get('https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_data')
    loadedJson = raw.json()['vaccination_data']
    vaccinationData = pd.DataFrame(loadedJson)

    with open(f'./json/cdc_vaccine_data_{loadedJson[0]["Date"]}.json', 'w') as outfile:
        json.dump(loadedJson, outfile)

    return glob('./json/*.json')

In [14]:
def parse1p3aVaccinationData(vaccinationDataList):
    dateDf = pd.read_csv('../../docs/csv/covid_confirmed_1p3a_state.csv')
    geoidTable = pd.read_csv('./statename_geoid.csv')
    datesList = list(dateDf.columns[2:])
    datesList.sort()
    datesList = ['GEOID', 'NAME'] + datesList
    placeholderDf = pd.DataFrame(datesList).set_index(0).T

    for idx, file in enumerate(vaccinationDataList):
        with open(file) as f:
            data = json.load(f)
            if (type(data)==dict):
                data = data['vaccination_data']
        currDate = data[0]['Date']
        vaccinationDf = pd.DataFrame(data) \
            .merge(geoidTable, left_on="Location", right_on="STUSPS", how="inner")[['GEOID','NAME','Doses_Distributed','Doses_Administered']]

        if idx == 0:
            vaccineAdministered = vaccinationDf[['GEOID','NAME','Doses_Administered']]
            vaccineDistributed = vaccinationDf[['GEOID','NAME','Doses_Distributed']]
            vaccineAdministered.columns = ['GEOID','NAME',currDate]
            vaccineDistributed.columns = ['GEOID','NAME',currDate]
        else:
            dailyVaccineAdministered = vaccinationDf[['GEOID','NAME','Doses_Administered']]
            dailyVaccineDistributed = vaccinationDf[['GEOID','NAME','Doses_Distributed']]
            dailyVaccineAdministered.columns = ['GEOID','NAME',currDate]
            dailyVaccineDistributed.columns = ['GEOID','NAME',currDate]

            vaccineAdministered = vaccineAdministered.merge(dailyVaccineAdministered, on=["GEOID","NAME"])
            vaccineDistributed = vaccineDistributed.merge(dailyVaccineDistributed, on=["GEOID","NAME"])

    merged = pd.concat([placeholderDf, vaccineAdministered])
    cols = list(merged.columns)[-2:] + list(merged.columns)[:-2]
    merged = merged[cols]
    merged.to_csv()

    merged.to_csv('./csv/vaccine_admin_cdc_1p3a_state.csv', index=False)
    merged.to_csv('../../docs/csv/vaccine_admin_cdc_1p3a_state.csv', index=False)

    merged = pd.concat([placeholderDf, vaccineDistributed])
    cols = list(merged.columns)[-2:] + list(merged.columns)[:-2]
    merged = merged[cols]
    merged.to_csv()

    merged.to_csv('./csv/vaccine_dist_cdc_1p3a_state.csv', index=False)
    merged.to_csv('../../docs/csv/vaccine_dist_cdc_1p3a_state.csv', index=False)

In [15]:
def parseNytVaccinationData(vaccinationDataList):
    dateDf = pd.read_csv('../../docs/csv/covid_confirmed_nyt_state.csv')
    geoidTable = pd.read_csv('./statename_geoid.csv')
    datesList = list(dateDf.columns[1:])
    datesList.sort()
    datesList = ['fips'] + datesList
    placeholderDf = pd.DataFrame(datesList).set_index(0).T

    for idx, file in enumerate(vaccinationDataList):
        with open(file) as f:
            data = json.load(f)
            if (type(data)==dict):
                data = data['vaccination_data']
        currDate = data[0]['Date']
        vaccinationDf = pd.DataFrame(data) \
            .merge(geoidTable, left_on="Location", right_on="STUSPS", how="inner")[['GEOID','NAME','Doses_Distributed','Doses_Administered']]

        if idx == 0:
            vaccineAdministered = vaccinationDf[['GEOID','Doses_Administered']]
            vaccineDistributed = vaccinationDf[['GEOID','Doses_Distributed']]
            vaccineAdministered.columns = ['fips',currDate]
            vaccineDistributed.columns = ['fips',currDate]
        else:
            dailyVaccineAdministered = vaccinationDf[['GEOID','Doses_Administered']]
            dailyVaccineDistributed = vaccinationDf[['GEOID','Doses_Distributed']]
            dailyVaccineAdministered.columns = ['fips',currDate]
            dailyVaccineDistributed.columns = ['fips',currDate]

            vaccineAdministered = vaccineAdministered.merge(dailyVaccineAdministered, on=["fips"])
            vaccineDistributed = vaccineDistributed.merge(dailyVaccineDistributed, on=["fips"])

    merged = pd.concat([placeholderDf, vaccineAdministered])
    cols = list(merged.columns)[-1:] + list(merged.columns)[:-1]
    merged = merged[cols]
    merged.to_csv()

    merged.to_csv('./csv/vaccine_admin_cdc_nyt_state.csv', index=False)
    merged.to_csv('../../docs/csv/vaccine_admin_cdc_nyt_state.csv', index=False)

    merged = pd.concat([placeholderDf, vaccineDistributed])
    cols = list(merged.columns)[-1:] + list(merged.columns)[:-1]
    merged = merged[cols]
    merged.to_csv()

    merged.to_csv('./csv/vaccine_dist_cdc_nyt_state.csv', index=False)
    merged.to_csv('../../docs/csv/vaccine_dist_cdc_nyt_state.csv', index=False)

In [16]:
fileList = downloadCDCVaccinationData()
parse1p3aVaccinationData(fileList)
parseNytVaccinationData(fileList)

KeyError: "['NAME'] not in index"

<hr>

In [17]:
import grequests
import geopandas as gpd

In [18]:
state2Digit = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

In [19]:
urls = [f"https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=integrated_county_timeseries_state_{stateCode}_external" for stateCode in state2Digit]
breakpoint = 12
urlDict = []
for i in range(0,5):
    urlDict.append(urls[breakpoint*i:breakpoint*(i+1)])

In [5]:
responses = []
for urlList in urlDict:
    rs = (grequests.get(u,  timeout=120) for u in urlList)
    response = grequests.map(rs)
    responses.append(response)

In [6]:
parsed = ''

for responseSet in responses:
    for response in responseSet:
        if len(parsed)==0:
            parsed = pd.DataFrame(response.json()['integrated_county_timeseries_external_data'])
        else:
            parsed = pd.concat([parsed, pd.DataFrame(response.json()['integrated_county_timeseries_external_data'])])

parsed = parsed.sort_values('date')

In [20]:
parsed.head()

Unnamed: 0,fips_code,state,county,new_cases_7_day_rolling_average,percent_new_test_results_reported_positive_7_day_rolling_average,new_cases_week_over_week_percent_change,new_test_results_reported_7_day_rolling_average,new_deaths_7_day_rolling_average,date,report_date_window_start,report_date_window
7487,1043,AL,Cullman County,0.0,0.0,0.0,0.0,0.0,2020-01-22T00:00:00,2020-01-16T00:00:00,2020-01-22T00:00:00
28237,31163,NE,Sherman County,0.0,0.0,0.0,0.0,0.0,2020-01-22T00:00:00,2020-01-16T00:00:00,2020-01-22T00:00:00
8797,53051,WA,Pend Oreille County,0.0,0.0,0.0,0.0,0.0,2020-01-22T00:00:00,2020-01-16T00:00:00,2020-01-22T00:00:00
55188,13321,GA,Worth County,0.0,0.0,0.0,0.0,0.0,2020-01-22T00:00:00,2020-01-16T00:00:00,2020-01-22T00:00:00
5285,13031,GA,Bulloch County,0.0,0.0,0.0,0.0,0.0,2020-01-22T00:00:00,2020-01-16T00:00:00,2020-01-22T00:00:00


  with loop.timer(seconds, ref=ref) as t:


In [21]:
popData = gpd.read_file('./counties.geojson')

In [22]:
popData

Unnamed: 0,GEOID,STATEFP,COUNTYFP,NAME,state_name,state_abbr,population,beds,criteria,geometry
0,31039,31,039,Cuming,Nebraska,NE,8991,25,Unknown,"POLYGON ((-97.01936 42.09058, -97.01991 41.742..."
1,53069,53,069,Wahkiakum,Washington,WA,4189,0,Persons,"POLYGON ((-123.72832 46.26454, -123.47964 46.2..."
2,35011,35,011,De Baca,New Mexico,NM,2060,0,Persons,"POLYGON ((-104.89338 34.08841, -104.78643 34.0..."
3,31109,31,109,Lancaster,Nebraska,NE,310094,1138,Unknown,"POLYGON ((-96.91094 41.04612, -96.91349 40.697..."
4,31129,31,129,Nuckolls,Nebraska,NE,4275,25,Unknown,"POLYGON ((-98.27357 40.35036, -98.27402 40.002..."
...,...,...,...,...,...,...,...,...,...,...
3215,13123,13,123,Gilmer,Georgia,GA,29922,0,,"POLYGON ((-84.61864 34.85540, -84.65710 34.728..."
3216,27135,27,135,Roseau,Minnesota,MN,15462,25,,"POLYGON ((-96.40541 48.99998, -96.38783 48.544..."
3217,28089,28,089,Madison,Mississippi,MS,103498,67,,"POLYGON ((-89.96588 32.87957, -90.04913 32.735..."
3218,48227,48,227,Howard,Texas,TX,36667,350,Specimen,"POLYGON ((-101.68874 32.52522, -101.69501 32.0..."


  with loop.timer(seconds, ref=ref) as t:


In [11]:
parsed.columns

Index(['fips_code', 'state', 'county', 'new_cases_7_day_rolling_average',
       'percent_new_test_results_reported_positive_7_day_rolling_average',
       'new_cases_week_over_week_percent_change',
       'new_test_results_reported_7_day_rolling_average',
       'new_deaths_7_day_rolling_average', 'date', 'report_date_window_start',
       'report_date_window'],
      dtype='object')

In [14]:
columnList = ['testing','new_cases_7_day_rolling_average', 'new_test_results_reported_7_day_rolling_average','new_deaths_7_day_rolling_average','percent_new_test_results_reported_positive_7_day_rolling_average']
csvNames = ['testing','covid_confirmed', 'tcap', 'covid_deaths', 'wk_tpos']
uniqFips = list(parsed.fips_code.unique())

for idx, column in enumerate(columnList):
    cleaned = ''

    for fips in uniqFips:
        if len(cleaned) == 0:
            cleaned = parsed[parsed.fips_code == fips][['date',column]] \
                .set_index('date').T
            cleaned['fips'] = fips
        else:
            tempDf = parsed[parsed.fips_code == fips][['date',column]] \
                .set_index('date').T
            tempDf['fips'] = fips
            cleaned = pd.concat([cleaned, tempDf])
    cleaned = cleaned[list(cleaned.columns)[-1:] + list(cleaned.columns)[:-1]]
    columnNames = [col[:10] for col in cleaned.columns]
    cleaned.columns = columnNames
    cleaned.to_csv(f"./csv/{csvNames[idx]}_cdc.csv",index=False)
    cleaned.to_csv(f"../../docs/csv/{csvNames[idx]}_cdc.csv", index=False)

In [None]:
cleaned = ''

for fips in uniqFips:
    if len(cleaned) == 0:
        cleaned = parsed[parsed.fips_code == fips][['date','new_test_results_reported_7_day_rolling_average']] \
            .set_index('date').T
        cleaned['fips'] = fips
    else:
        tempDf = parsed[parsed.fips_code == fips][['date','new_test_results_reported_7_day_rolling_average']] \
            .set_index('date').T
        tempDf['fips'] = fips
        cleaned = pd.concat([cleaned, tempDf])
cleaned = cleaned[list(cleaned.columns)[-1:] + list(cleaned.columns)[:-1]]
columnNames = [col[:10] for col in cleaned.columns]
cleaned.columns = columnNames
cleaned.to_csv(f"./csv/{csvNames[idx]}_cdc.csv",index=False)
cleaned.to_csv(f"../../docs/csv/{csvNames[idx]}_cdc.csv", index=False)