In [63]:
import ast
import json
import numpy as np
import pandas as pd

In [64]:
# load the data
raw_data = pd.read_json("data/coviddata.json")
data = raw_data['values'].tolist()
raw_df = pd.DataFrame(data)

In [65]:
# some initial exploration
new_cols = df.iloc[:1].values.tolist()[0]
df.columns = new_cols

# handle multiple headers? weird
clean_df = df[2:]
clean_df.head(2)

Unnamed: 0,Approved,Deduped,Mod Status,Timestamp,What is the name of the hospital or clinic?,Final Address,Street address for dropoffs?,City,State?,"Drop off instructions, eg curbside procedure or mailing address ATTN: instructions:",...,Will they accept open boxes/bags?,Type of request:,Email Address,Type of organization?,CCN / Hospital ID (if applicable),When will you start running out of supplies?,Please describe most significant shortages:,State?.1,Lat,Lng
2,x,x,,3/19/2020 11:14:25,Swedish Ballard,"5300 Tallman Ave NW\nSeattle, WA 98107","5300 Tallman Ave NW\nSeattle, WA 98107",Seattle,WA,Put in donations bin at registration desk or a...,...,Yes,,,,,,,,47.6674625,-122.3795306
3,x,x,,3/19/2020 14:37:04,Zuckerberg San Francisco General Hospital,"1001 Potrero Ave\nSan Francisco, CA 94110","1001 Potrero Ave\nSan Francisco, CA 94110",San Francisco,CA,"For now, call ahead: call the switchboard (628...",...,No,,,,,,,,37.7557265,-122.4047381


In [66]:
# Get list of all ppe items requested
all_ppe = [ppe.split(',') for ppe in clean_df['What are they accepting?'].values.tolist()]

In [67]:
# create flat list of all ppe items requests
flatten = lambda l: [item for sublist in l for item in sublist]
flat_ppe = set(flatten(all_ppe))

In [68]:
# We're gonna break out the PPE items into their own columns and have
# true/false logic for each row corresponding to which PPE items
# they need.

# create dict to store whether each row requested the relevant piece of PPE
store = dict()
for ppe in flat_ppe:
    store[ppe] = []

# count those ppe requestes up
for idx, row in clean_df.iterrows():
    for ppe in store.keys():
        if ppe in row['What are they accepting?']:
            store[ppe].append(True)
        else:
            store[ppe].append(False)

# put it into a df to be joined back with the original dataset            
store_df = pd.DataFrame(store)

In [70]:
# find the 20 most common items
common_ppe = pd.DataFrame(store_df.sum()).sort_values(0, ascending=False).iloc[:20]
list(common_ppe.index)

['',
 'N95s',
 'Surgical masks',
 ' Surgical masks',
 'Face shields',
 'Gowns',
 ' Face shields',
 ' Gown',
 ' Gowns',
 ' sanitize',
 ' sanitizer',
 'Hand sanitizer',
 ' Hand sanitizer',
 ' Safety goggles',
 'Gloves',
 ' Gloves',
 ' Disinfecting wipes',
 ' Disposable booties',
 ' Thermometers',
 ' Surgical Masks']

In [71]:
# manually create list in order to quickly remove duplicates
most_common_items = ['N95s',
 'Surgical masks',
 'Face shields',
 'Gowns',
 'Hand sanitizer',
 ' Safety goggles',
 'Gloves',
 ' Disinfecting wipes',
 ' Disposable booties',
 ' Thermometers',
 ' Surgical Masks']

In [72]:
# join the true/false df back with the original dataset
merged_df = df.join(store_df[most_common_items])

In [73]:
export_df = merged_df.to_csv('data/findthemasksjson_parsed_03272020.csv')

## Pulling in Zip Codes (Keyon V did the merge)

In [77]:
# read in zipcode data that keyon put together
zip_df = pd.read_csv('data/coviddata_metro.csv')

In [82]:
# fill nans so that we can cast columns as floats
merged_df = merged_df.fillna(np.nan)
zip_df = zip_df.fillna(np.nan)
merged_df[merged_df == 'N/A'] = np.nan
merged_df = merged_df.iloc[2:]

In [83]:
# we're going to merge on latitude/longitude
# cast latitude/longitude data as a common type (float)
merged_df['Lat'] = merged_df.Lat.apply(float)
merged_df['Lng'] = merged_df.Lng.apply(float)

zip_df['lat'] = zip_df.lat.apply(float)
zip_df['lng'] = zip_df.lng.apply(float)

# merge it up
zip_merged = pd.merge(merged_df, zip_df,  how='left', left_on=['Lat','Lng'], right_on = ['lat','lng'])

In [84]:
zip_merged

Unnamed: 0,Approved,Deduped,Mod Status,Timestamp,What is the name of the hospital or clinic?,Final Address,Street address for dropoffs?,City,State?,"Drop off instructions, eg curbside procedure or mailing address ATTN: instructions:",...,state,instructions,accepting,open_box,lat,lng,row,zipcode,FIPS,metro
0,x,x,,3/19/2020 11:14:25,Swedish Ballard,"5300 Tallman Ave NW\nSeattle, WA 98107","5300 Tallman Ave NW\nSeattle, WA 98107",Seattle,WA,Put in donations bin at registration desk or a...,...,,,,,,,,,,
1,x,x,,3/19/2020 14:37:04,Zuckerberg San Francisco General Hospital,"1001 Potrero Ave\nSan Francisco, CA 94110","1001 Potrero Ave\nSan Francisco, CA 94110",San Francisco,CA,"For now, call ahead: call the switchboard (628...",...,CA,"For now, call ahead: call the switchboard (628...","N95s, Surgical Masks",No,37.755727,-122.404738,4.0,94110.0,6075.0,1.0
2,x,x,,3/19/2020 15:11:30,Franciscan Women's Health Associates - Burien,"16045 1st Ave S\nBurien, WA 98148","16045 1st Ave S\nBurien, WA 98148",Burien,WA,Bring up stairs to the Women's care desk or ca...,...,,,,,,,,,,
3,x,x,,3/19/2020 15:19:46,Moab Regional Hospital,"450 Williams Way\nMoab, UT 84532","450 Williams Way\nMoab, UT 84532",Moab,UT,TBD,...,UT,TBD,"N95s, Surgical Masks, Safety Goggles",Yes,38.575156,-109.559775,8.0,84532.0,49019.0,0.0
4,x,x,,3/19/2020 15:52:02,Northern Nevada Medical Center,"2375 E. Prater Way\nSparks, NV 89431","2375 E. Prater Way\nSparks, NV 89431",Sparks,NV,"Mail to: Alexa Parker, Emergency Dept.- NNMC, ...",...,NV,"Mail to: Alexa Parker, Emergency Dept.- NNMC, ...","N95s, Surgical Masks, Disposable Booties, Safe...",Yes,39.541116,-119.697360,10.0,89431.0,32031.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,x,,Q,3/26/2020 7:25:05,Saint Alphonsus Regional Medical Center,"1055 N Curtis Rd\nBoise, ID 83706",1055 N. Curtis Rd.,Boise,ID,Receiving Dock ATTN: Donations,...,,,,,,,,,,
1678,x,,Q,3/26/2020 7:28:35,Saint Alphonsus Medical Center,"4300 E Flamingo Ave\nNampa, ID 83687",4300 E Flamingo Ave,Nampa,ID,Supply Chain Receiving Dock - ATTN: Donations,...,,,,,,,,,,
1679,x,,Q,3/26/2020 7:36:33,CharDonnay Dialysis,"21006 Las Lomas Blvd\nSan Antonio, TX 78258",21006 Las Lomas Blvd,San Antonio,TX,Administrator's house. Please drop there. Our ...,...,TX,Administrator's house. Please drop there. Our ...,"N95s, Face shields, Safety goggles, Gloves, Go...",Yes,29.636998,-98.489970,1901.0,78258.0,48029.0,1.0
1680,x,,Q,3/26/2020 7:28:47,Interfaith Medical,"1545 Atlantic Ave\nBrooklyn, NY 11213",1545 Atlantic Avenue,Brooklyn,NY,ATT:Dr. Lawrence Lai,...,,,,,,,,,,
