In [6]:
import ast
import json
import numpy as np
import pandas as pd

In [7]:
# load the data
raw_data = pd.read_json("data/coviddata.json")
data = raw_data['values'].tolist()
raw_df = pd.DataFrame(data)

In [9]:
# some initial exploration
new_cols = raw_df.iloc[:1].values.tolist()[0]
raw_df.columns = new_cols

# handle multiple headers? weird
clean_df = raw_df[2:]
clean_df.head(2)

Unnamed: 0,Approved,Deduped,Mod Status,Timestamp,What is the name of the hospital or clinic?,Final Address,Street address for dropoffs?,City,State?,"Drop off instructions, eg curbside procedure or mailing address ATTN: instructions:",...,Will they accept open boxes/bags?,Type of request:,Email Address,Type of organization?,CCN / Hospital ID (if applicable),When will you start running out of supplies?,Please describe most significant shortages:,State?.1,Lat,Lng
2,x,x,,3/19/2020 11:14:25,Swedish Ballard,"5300 Tallman Ave NW\nSeattle, WA 98107","5300 Tallman Ave NW\nSeattle, WA 98107",Seattle,WA,Put in donations bin at registration desk or a...,...,Yes,,,,,,,,47.6674625,-122.3795306
3,x,x,,3/19/2020 14:37:04,Zuckerberg San Francisco General Hospital,"1001 Potrero Ave\nSan Francisco, CA 94110","1001 Potrero Ave\nSan Francisco, CA 94110",San Francisco,CA,"For now, call ahead: call the switchboard (628...",...,No,,,,,,,,37.7557265,-122.4047381


In [10]:
# Get list of all ppe items requested
all_ppe = [ppe.split(',') for ppe in clean_df['What are they accepting?'].values.tolist()]

In [11]:
# create flat list of all ppe items requests
flatten = lambda l: [item for sublist in l for item in sublist]
flat_ppe = set(flatten(all_ppe))

In [26]:
# We're gonna break out the PPE items into their own columns and have
# true/false logic for each row corresponding to which PPE items
# they need.

# create dict to store whether each row requested the relevant piece of PPE
store = dict()
for ppe in flat_ppe:
    store[ppe] = []

# count those ppe requestes up
for idx, row in clean_df.iterrows():
    for ppe in store.keys():
        if ppe in row['What are they accepting?']:
            store[ppe].append(1)
        else:
            store[ppe].append(0)

# put it into a df to be joined back with the original dataset            
store_df = pd.DataFrame(store)

In [27]:
# find the 20 most common items
common_ppe = pd.DataFrame(store_df.sum()).sort_values(0, ascending=False).iloc[:20]
list(common_ppe.index)

['',
 'N95s',
 'Surgical masks',
 ' Surgical masks',
 'Face shields',
 'Gowns',
 ' Gown',
 ' Face shields',
 ' Gowns',
 ' sanitize',
 ' sanitizer',
 'Hand sanitizer',
 ' Hand sanitizer',
 ' Safety goggles',
 'Gloves',
 ' Gloves',
 ' Disinfecting wipes',
 ' Disposable booties',
 ' Thermometers',
 ' Surgical Masks']

In [28]:
# manually create list in order to quickly remove duplicates
most_common_items = ['N95s',
 'Surgical masks',
 'Face shields',
 'Gowns',
 'Hand sanitizer',
 ' Safety goggles',
 'Gloves',
 ' Disinfecting wipes',
 ' Disposable booties',
 ' Thermometers',
 ' Surgical Masks']

In [52]:
# join the true/false df back with the original dataset
merged_df = raw_df.join(store_df[most_common_items])
merged_df = merged_df[2:]
merged_df = merged_df.reset_index()
merged_df = merged_df.drop('index', 1)

In [53]:
export_df = merged_df.to_csv('data/findthemasksjson_parsed_03272020.csv')

## Pulling in Zip Codes (Keyon V did the merge)

In [54]:
# read in zipcode data that keyon put together
zip_df = pd.read_csv('data/coviddata_metro.csv')

In [57]:
# fill nans so that we can cast columns as floats
merged_df = merged_df.fillna(np.nan)
zip_df = zip_df.fillna(np.nan)
# merged_df[merged_df == 'N/A'] = np.nan


In [66]:
# we're going to merge on latitude/longitude
# cast latitude/longitude data as a common type (float)
# merged_df['Lat'] = merged_df.Lat.apply(float)
# merged_df['Lng'] = merged_df.Lng.apply(float)

# zip_df['lat'] = zip_df.lat.apply(float)
# zip_df['lng'] = zip_df.lng.apply(float)

# merge it up
# zip_merged = pd.merge(merged_df, zip_df,  how='left', left_on=['Lat','Lng'], right_on = ['lat','lng'])
zip_merged = pd.merge(merged_df, zip_df,  how='left', left_on='Timestamp', right_on = 'timestamp')

In [67]:
zip_merged.columns

Index(['Approved', 'Deduped', 'Mod Status', 'Timestamp',
       'What is the name of the hospital or clinic?', 'Final Address',
       'Street address for dropoffs?', 'City', 'State?',
       'Drop off instructions, eg curbside procedure or mailing address ATTN: instructions:',
       'What are they accepting?', 'Will they accept open boxes/bags?',
       'Type of request:', 'Email Address', 'Type of organization?',
       'CCN / Hospital ID (if applicable)',
       'When will you start running out of supplies?',
       'Please describe most significant shortages:', 'State?', 'Lat', 'Lng',
       'N95s', 'Surgical masks', 'Face shields', 'Gowns', 'Hand sanitizer',
       ' Safety goggles', 'Gloves', ' Disinfecting wipes',
       ' Disposable booties', ' Thermometers', ' Surgical Masks', 'approved',
       'deduped', 'residence', 'timestamp', 'name', 'address', 'orig_address',
       'city', 'state', 'instructions', 'accepting', 'open_box', 'lat', 'lng',
       'row', 'zipcode', 'FIPS',

## Plotting

In [69]:
zip_merged.groupby('zipcode').sum()

Unnamed: 0_level_0,N95s,Surgical masks,Face shields,Gowns,Hand sanitizer,Safety goggles,Gloves,Disinfecting wipes,Disposable booties,Thermometers,Surgical Masks,lat,lng,row,FIPS,metro
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
926.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,18.331805,-66.072930,215.0,72127.0,1.0
1002.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,42.409355,-72.529680,1096.0,25015.0,1.0
1199.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,42.120466,-72.605471,381.0,25013.0,1.0
1201.0,4.0,3.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,1.0,0.0,169.809733,-293.004511,3191.0,100012.0,0.0
1230.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,84.384840,-146.744364,1873.0,50006.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99156.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,48.181822,-117.056550,918.0,53051.0,1.0
99204.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,47.651772,-117.414111,518.0,53063.0,1.0
99336.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,46.213357,-119.221646,1367.0,53005.0,1.0
99352.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.281386,-119.282223,61.0,53005.0,1.0
