In [None]:
import pandas as pd
import json
import requests
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import time
import us
import plotly.graph_objects as go
from plotly.offline import plot
sns.set()

In [None]:
# Was having issues loading the direct link, so for now I load from desktop.
# Note that the csv file is not on Github.
survey = pd.read_csv("request_details.csv")

In [None]:
# Make sure zip codes have leading zeros.
survey['Zip'] = survey['Zip'].apply(lambda x: '{0:0>5}'.format(x))
# Only use the first five digits so it's a valid zipcode
survey['Zip'] = survey['Zip'].astype(str).str[0:5]

In [None]:
# Some string fields have formatting issues.
columns = survey.select_dtypes(include='object').columns
for column in columns:
    survey[column] = survey[column].str.replace('\xa0', ' ')

In [None]:
ppes = ['N95', 'Surgical Mask', 'Face Shield', 'Booties', 'Gloves', 'Gown', 
        'Sanitizer', 'Wipes', 'Thermometer', 'Homemade Mask', 'Other']

In [None]:
for ppe in ppes:
    percent_filled_out = 100 * (1 - np.mean(pd.isnull(survey['Requesting {}'.format(ppe)])))
    print("{:.1f}% left an answer for whether they need {}.".format(percent_filled_out, ppe))

In [None]:
# For now, drop 'Surgical Mask' because there was a bug in the survey.

In [None]:
ppes = ['N95', 'Face Shield', 'Booties', 'Gloves', 'Gown', 
        'Sanitizer', 'Wipes', 'Thermometer', 'Homemade Mask', 'Other']

In [None]:
# Remove all rows which have null values for any of the PPEs/
valid_requests = survey
for ppe in ppes:
    valid_requests = valid_requests[valid_requests['Requesting {}'.format(ppe)].notnull()]
    valid_requests['Requesting {}'.format(ppe)] = valid_requests[
        'Requesting {}'.format(ppe)].replace({'Yes': True, 'No': False})

In [None]:
# Still keeping 81% of DF, probably about the best we could do.
len(valid_requests) / len(survey)

In [None]:
for ppe in ppes:
    percent_needing_ppe = 100 * np.mean(valid_requests['Requesting {}'.format(ppe)].astype(int))
    print("{:.1f}% of respondents need {}.".format(percent_needing_ppe, ppe))

In [None]:
# Plot pie chart

In [None]:
fig, axs = plt.subplots(5, 2, figsize=(10, 15))
fig.tight_layout(pad=3.0)
order = [3, 2, 0, 1, 4]
count = 0
colors = ["#FDFCEF","#FFDA55","#FFC831","#FC7555","#E96E81"]
for ppe in ppes:
    ppe_stock = np.array(valid_requests[~pd.isnull(valid_requests['{} Stock'.format(ppe)])]['{} Stock'.format(ppe)])
    ppe_unique, ppe_counts = np.unique(ppe_stock, return_counts=True)
    labels = ppe_unique[order]
    percentages = (ppe_counts / np.sum(ppe_counts) * 100)[order]
    axs[count // 2, count % 2].pie(percentages, labels=labels, colors=colors, autopct='%1.0f%%')
    axs[count // 2, count % 2].set_title("{} Supply Remaining (n={})".format(ppe, len(ppe_stock)), size=16)
    count += 1
plt.savefig("supplies_remaining.png", dpi=200, bbox_to_anchor='tight')

In [None]:
## Supply by region

In [None]:
# Slightly modify Matt's plotting functions.

In [None]:
import plotly.graph_objects as go
from plotly.offline import plot

def choropleth_mapbox_usa_plot(counties, locations, z, text,
                                colorscale = "RdBu_r", zmin=-1, zmax=10, 
                                title='choropleth_mapbox_usa_plot',
                                colorbar_title = 'count',
                                html_filename='plot.html',
                                show_fig=True):
    
    # Choropleth graph. For reference: https://plotly.com/python/mapbox-county-choropleth/
    fig = go.Figure(go.Choroplethmapbox(
        geojson=counties, locations=locations, z=z, text=text,
        colorscale=colorscale,zmin=zmin,zmax=zmax,marker_opacity=0.8, 
        marker_line_width=0.5, colorbar_title=colorbar_title, hoverinfo='text'
        ))
    
    # Center on US
    fig.update_layout(
        title=title,
        mapbox_style="carto-positron",
        mapbox_zoom=3.5, 
        mapbox_center = {"lat": 37.0902, "lon": -95.7129},
        margin={"r":100,"t":30,"l":30,"b":0},
    )

    # Show the figure
    if show_fig:
        fig.show()
    
    # Download the figure From Sunny Mui
    go.Figure.write_html(fig, file=html_filename, config={'responsive': True}, include_plotlyjs='cdn')

In [None]:
def download_county_geojson_and_merge_df(geojson_url, mask_df_counties, number_to_category, county_label="Stock"):
    """mask_df_counties just needs to have fields for 'fips' and 'counts'."""
    # Download the data
    s=requests.get(geojson_url).text

    # Extract the json format, and find column headers
    counties = json.loads(s)
    
    # Create counties_df from geojson counties object
    counties_df = pd.DataFrame.from_dict(counties['features'])
    counties_df['properties'][0]

    # extract properties dict, then concatenate new clumsn and remove old properties column
    counties_df = pd.concat(
        [counties_df, pd.json_normalize(counties_df['properties'])], axis=1).drop(['properties'], axis=1)

    # clean up the dataframe                                                                               
    counties_df.drop(['type','COUNTY','LSAD'], axis=1, inplace=True)
    counties_df.rename(columns={'id':'fips','NAME':'county'}, inplace=True)
    counties_df.head()
    
        
    # join with the dataframe that has ppe requests: mask_df
    merged_df = counties_df.join(
        mask_df_counties[['fips','counts']].set_index('fips'),
        on='fips',  how='left', lsuffix='counties', rsuffix='mask_df')

    # fill the NA in counts with 0s
    merged_df['counts'].fillna(0, inplace=True)
    
    # change name of column 'counts' to 'PPE_requests' 
    merged_df.rename(inplace=True,
        columns={'counts':'PPE_requests'})
    
    # Map fips state code to state name
    merged_df['STATE'] = merged_df.apply(
        lambda x: us.states.lookup(x['STATE']), axis=1)
    merged_df['county_info_for_map'] = merged_df.apply(
        lambda x: ('PPE Requests: %s, %s'%(x['county'],x['STATE'])), axis=1)
    
    # Create text column for use in mapping
    merged_df['ppe_text'] = '{}: '.format(county_label) + merged_df.apply(lambda x: number_to_category[x['PPE_requests']], axis=1) + '<br>'+ \
        merged_df['county'].astype(str) + ', ' + merged_df['STATE'].astype(str)
    
    # return a json object called counties for plotting, and a counties_df for joins+manipulation of other data
    return counties, merged_df

In [None]:
# Merge zips and FIPS.
zip_county_crosswalk = pd.read_csv(
    'zip_county_crosswalk.csv', converters={'ZIP': lambda x: str(x), 
                                            'COUNTY': lambda x: str(x)})
zip_county_crosswalk.columns = map(str.lower, zip_county_crosswalk.columns)

# Some zipcodes have multiple FIPS codes, so use the FIPS code that has the largest overlap.
idx = (zip_county_crosswalk.groupby(['zip'])['tot_ratio'].transform(max) == 
       zip_county_crosswalk['tot_ratio'])
zip_max_county = zip_county_crosswalk[idx]
zip_max_county['fips'] = zip_max_county['county']

In [None]:
requests_with_fips = pd.merge(valid_requests, 
                              zip_max_county[['zip', 'fips']], 
                              left_on='Zip', 
                              right_on='zip').drop('zip', axis=1)

In [None]:
# Lost 5 rows but whatever
len(requests_with_fips) / len(valid_requests)

In [None]:
# Count number of requests by county

In [None]:
stock_to_category = {'No supply remaining': 5, '2 days or less': 4, '1 week or less': 3, 
                 '2 weeks or less': 2, 'More than 2 weeks': 1}
number_to_category = {5: 'No supply remaining', 4: '2 days or less', 3: '1 week or less', 
                  2: '2 weeks or less', 1: 'More than 2 weeks', 0: 'No data'}
geojson_url = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
for ppe in ppes:
    ppe_requests = requests_with_fips[['fips', '{} Stock'.format(ppe)]]
    ppe_requests = ppe_requests[~pd.isnull(ppe_requests['{} Stock'.format(ppe)])]
    ppe_requests['counts'] = ppe_requests.apply(lambda x: stock_to_category[x['{} Stock'.format(ppe)]], axis=1)
    # Aggregate by mode
    ppe_average_stock = ppe_requests[['fips', 'counts']].groupby('fips').agg(lambda x:x.value_counts().index[0]).reset_index()
    counties, merged_df = download_county_geojson_and_merge_df(geojson_url, ppe_average_stock, number_to_category)
    # Couldn't figure out how to fix the labels on the colorbar, but at least the hover has the right categories.
    choropleth_mapbox_usa_plot(
        counties = counties,
        locations = merged_df.fips,
        z = merged_df.PPE_requests,
        text = merged_df.ppe_text,
        colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
        zmin = 0,
        zmax=5,
        title = '{} Stock by County (Hover for breakdown)'.format(ppe),
        colorbar_title = '{} Stock (Darker is less)'.format(ppe),
        html_filename = 'stock_maps/{}_Stock_By_County.html'.format(ppe.replace(' ', '_')))

In [None]:
# Mapping total requests by region

In [None]:
# Pick all rows that have a non-null value for N95, since it is most frequently requested.
ppe_request_counts = requests_with_fips.groupby('fips').aggregate('count').reset_index()[['fips', 'Requesting N95']]
ppe_request_counts['counts'] = ppe_request_counts['Requesting N95']
number_to_category={}
for i in range(50):
    number_to_category[i] = str(i)
counties, merged_df = download_county_geojson_and_merge_df(geojson_url, ppe_request_counts, number_to_category, 
                                                          county_label='Requests')
choropleth_mapbox_usa_plot(
        counties = counties,
        locations = merged_df.fips,
        z = merged_df.PPE_requests,
        text = merged_df.ppe_text,
        colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
        zmin = 0,
        zmax=5,
        title = 'Requests by County (Hover for breakdown)',
        colorbar_title = 'Requests',
        html_filename = 'stock_maps/Requests_By_County.html')

In [None]:
# By type of institution

In [None]:
institution_types =  np.array(['Acute Care Hospital', 'Non-Acute Care Hospital',
       'Freestanding Emergency Room', 'Urgent Care Clinic', 'Field Hospital',
       'Hospital Overflow Facility', 'EMS of Fire Department',
       'Independent Clinic', 'Federally Qualified Health Center (FQHC)',
       'Disproportionate Share Hospital', 'Critical Access',
       'Rural Health Clinic', 'Public Health Clinic',
       'Tribal healthcare / Indian Health Service', 'Nursing Facility',
       'Inpatent Rehabilitation Center',
       'Residential Substance Treatment Center',
       'Resident or Inpatient Psychiatric Facility', 'Police Department',
       'Group Home', 'Assisted Living Facility',
       'Correctional Facility/Detention Center', 'Homeless Shelter', 'Hospice',
       'Home Health Aides/Other Home Services'])

In [None]:
response_list = []
for institution_type in institution_types:
    num_responses = np.sum(~valid_requests[institution_type].isna())
    response_list.append(num_responses)
response_list = np.array(response_list)

In [None]:
institution_types = institution_types[np.argsort(-response_list)]

In [None]:
for institution_type in institution_types:
    num_responses = np.sum(~valid_requests[institution_type].isna())
    print("{} responses from {}".format(num_responses, institution_type))

In [None]:
# Pie chart of: for each PPE, what is the breakdown of facility types that have run out?
sns.set()
fig, axs = plt.subplots(5, 2, figsize=(20, 18))
fig.tight_layout(pad=1.0)
count = 0
colors = ["#FDFCEF","#FFDA55","#FFC831","#FC7555","#E96E81"]
for ppe in ppes:
    type_to_num_ran_out = {}
    # Look at 7 most popular institution types.
    for institution_type in institution_types[:7]:
        type_to_num_ran_out[institution_type] = np.sum(
            valid_requests[~pd.isnull(valid_requests[institution_type])]['{} Stock'.format(ppe)] == "No supply remaining")
    num_ran_out = np.array(list(type_to_num_ran_out.values()))
    percentages = num_ran_out / np.sum(num_ran_out)
    labels = list(type_to_num_ran_out.keys())
    axs[count // 2, count % 2 * 1].pie(percentages, labels=labels, autopct='%1.0f%%')
    axs[count // 2, count % 2 * 1].set_title("Institutions without {} (n={})".format(
            ppe, np.sum(num_ran_out)), size=16)
    count += 1
plt.subplots_adjust(top = 0.95)
plt.savefig("institutions_without_ppe.png", dpi=200, bbox_to_anchor='tight')