# Correlation of PPE Demand in USA With Covid19 Cases

In [1]:
# import data
import json, requests
import os, sys, time
import configparser

# computing
import pandas as pd
import numpy as np

### Add python path and add local packages

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Add getusppe_viz/src to python path (two up)
two_up = os.path.abspath(path.join(os.getcwd(),"../.."))
src_dir = os.path.join(two_up,'src')
if src_dir not in sys.path:
    sys.path.append(src_dir)

# Import local libraries
from geocode import geocoder
from mapping import choropleth_mapbox_usa_plot, \
    viz_correlation_ppe_request_covid19_cases
from data_download import download_findthemasks_data, \
    download_nytimes_data, \
    download_hospital_data, \
    download_PPE_donors, \
    download_zip_to_fips_data
from data_process import add_fips_county_info, \
    requests_per_county, \
    download_county_geojson_and_merge_df, \
    merge_covid_ppe_df, \
    process_hospital_data, \
    merge_covid_ppe_hosp_df, \
    calculate_covid_per_bed_available, \
    find_counties_with_covid19_and_no_ppe_request, \
    add_all_ppe_requests_to_merged_df, \
    add_fips_ppe_donors, \
    donors_per_county, \
    calculate_donor_per_requester, \
    create_requestor_df_for_querying_requesters
from math_custom import distance, closest, k5_closest, k10_closest

NameError: name 'path' is not defined

### Configs

In [None]:
config = configparser.ConfigParser()
config.read(os.path.join(src_dir,'config.ini'))

ny_times_covid_date = config['viz']['ny_times_covid_date']
findthemasks_url = config['viz']['findthemasks_url']
request_headers = eval(config['viz']['request_headers'])
county_fips_download_url = config['viz']['county_fips_download_url']
geojson_url = config['viz']['geojson_url']
ny_times_county_data_url = config['viz']['ny_times_county_data_url']
hospital_download_url = config['viz']['hospital_download_url']

## Download find the mask data and convert to pandas
- Taken from find the mask [web visualization](https://findthemasks.com/give.html) 
- [Data updated every 5 mins here](findthemasks.com/data.json) - The data visulized here is from 3/25 at 10PM PST

In [None]:
mask_df = download_findthemasks_data(
    url = findthemasks_url, request_headers = request_headers)
mask_df.head(2)

### Save out temporary list of data from specific states

In [None]:
states = ['CA']
states_of_interest = mask_df[mask_df.State.isin(states)]
path = 'Data_for_Lily' + time.strftime("%Y%m%d") + '.csv'
states_of_interest.to_csv (path, index = False, header=True)

### Create geocoder class to find fips and county information by lat/long

In [None]:
geocoder = geocoder(county_fips_download_url)

### Search and add the FIPS code to each row - WILL TAKE SEVERAL MINS

In [None]:
mask_df = add_fips_county_info(mask_df, geocoder)

In [None]:
timestr = time.strftime("%Y%m%d")
path = 'mask_df_data_by_county_' + timestr + '.csv'
mask_df.to_csv (path, index = False, header=True)

### Sum amount of requests per county

In [None]:
mask_df_counties = requests_per_county(mask_df, write_out_csv = True)
mask_df_counties.head(5)

### Download county geo information

In [None]:
counties, merged_df = download_county_geojson_and_merge_df(geojson_url, mask_df_counties)
merged_df.head(2)

### Add list of all ppe requests per county

In [None]:
merged_df = add_all_ppe_requests_to_merged_df(mask_df,merged_df)

In [None]:
# TODO : Add all requests info as clickable info on the interactive maps
# How to pull array of dicts from 'all_ppe_requests' category
''' 
all_ppe_locations_array= eval(str(merged_df.loc[
    merged_df['fips'] == '01073', 'all_ppe_requests'].values[0]))
print (all_ppe_locations_array)
merged_df['ppe_text'] = 'PPE Requests: ' + merged_df['PPE_requests'].astype(int).astype(str) + '<br>'+ \
        merged_df['county'].astype(str) + ', ' + merged_df['STATE'].astype(str) + \
        eval(merged_df['all_ppe_requests'].values
'''

### Map of PPE Requests

In [None]:
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_df.fips,
    z = merged_df.PPE_requests,
    text = merged_df.ppe_text,
    #colorscale = ["#fdfcef", "#c7e9b4", "#6ab7a6","#41b6c4","#2c7fb8","#253494"],
    colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
    zmin = 0,
    zmax=5,
    title = ('PPE Requests By County - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    colorbar_title = '> PPE Requests',
    html_filename = 'PPE_Requests_By_County.html')

## Download COVID19 data and convert to pandas

In [None]:
covid_df = download_nytimes_data(
    ny_times_county_data_url, ny_times_covid_date, write_out_csv = True)
covid_df.head(5)

### Merge the covid and Nytimes data

In [None]:
merged_covid_ppe_df=merge_covid_ppe_df(covid_df,merged_df) 
merged_covid_ppe_df.head(2)

### Mapping covid cases 

In [None]:
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_df.fips,
    z = merged_covid_ppe_df.cases,
    text = merged_covid_ppe_df.covid_text,
    colorscale = ["#fdfcef","#ffda55","#FFC831","#fc7555","#e96e81",],
    zmin = 0,
    zmax=100,
    title = ('COVID19 Cases Per County - %s - (Hover for breakdown)' % ny_times_covid_date),
    html_filename = ('COVID19_Cases_Per_County_%s.html' % ny_times_covid_date),
    colorbar_title = '> COVID19 Cases',
)

## Hospital bed visualization by county 

In [None]:
hospital_df = download_hospital_data(hospital_download_url, write_out_csv = True)
hospital_df.head(3)

In [None]:
hospital_df_counties = process_hospital_data(hospital_df, write_out_csv = True)
hospital_df_counties.head(2)

In [None]:
merged_covid_ppe_hosp_df=merge_covid_ppe_hosp_df(hospital_df_counties,merged_covid_ppe_df) 
merged_covid_ppe_hosp_df.head(2)

In [None]:
# Hospital bed plotting
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_df.fips,
    z = merged_covid_ppe_hosp_df.BEDS,
    text = merged_covid_ppe_hosp_df.hosp_text,
    colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
    zmin = 0,
    zmax=500,
    title = ('Hospital beds per county - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    html_filename = ('Hospital_beds_per_county_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> Hospital Beds'
    )

### Covid cases per bed available

In [None]:
merged_covid_ppe_hosp_df = calculate_covid_per_bed_available(merged_covid_ppe_hosp_df)
merged_covid_ppe_hosp_df.head(2)

In [None]:
# Map hazard ratio
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_df.fips,
    z = merged_covid_ppe_hosp_df.Covid_cases_per_bed,
    text = merged_covid_ppe_hosp_df.hosp_text,
    colorscale = ["#fdfcef","#ffda55","#FFC831","#fc7555","#e96e81",],
    zmin = 0,
    zmax=1,
    title = ('Hazard Ratio: Covid19 Cases, Per Bed, Per County - %s - (Hover for breakdown)' % ny_times_covid_date),
    html_filename = ('Covid19_cases_per_bed_per_county_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> Hazard Ratio (Cases/Bed)'
    )

### Identify counties with No PPE requests, with highest Covid19 cases

In [None]:
covid_ppe_df = find_counties_with_covid19_and_no_ppe_request(covid_df, mask_df_counties)
covid_ppe_df[['date','county','state','cases','deaths','PPE_requests']].head(10)

### Correlation of PPE request per county with COVID19 cases

In [None]:
viz_correlation_ppe_request_covid19_cases(merged_covid_ppe_hosp_df)

### Match hospital names to hospital database

In [None]:
# TODO
''' 
from fuzzywuzzy import fuzz 
from fuzzywuzzy import process 

# this takes a while, so I limited it to 100 names
returned_matches = []
for name in mrl['name'].values.tolist()[:99]:
    best_matches = process.extract(name, std_hospital_names)
    returned_matches.append((name, best_matches))
'''


### Download the 'I have PPE dataset' - Not public currently

In [None]:
ppe_donors_df = download_PPE_donors()
ppe_donors_df.head(2)

### Map zip code to lat long

In [None]:
zip_fips_df=download_zip_to_fips_data()
zip_fips_df.head(2)

### Donwload zip to lat long data

In [None]:
zip_lat_long_df=download_zip_to_lat_long_data()
zip_lat_long_df.head(2)

### Map zip code to county fips

In [None]:
ppe_donors_with_zip_df = add_fips_ppe_donors(ppe_donors_df, zip_fips_df)
ppe_donors_with_zip_df.head(2)

### Merge donors and requester information with large merged DF

In [None]:
merged_covid_ppe_hosp_donors_df = donors_per_county(
    ppe_donors_with_zip_df, merged_covid_ppe_hosp_df, write_out_csv = True)
merged_covid_ppe_hosp_donors_df.head(2)

In [None]:
# Donors plotting
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_donors_df.fips,
    z = merged_covid_ppe_hosp_donors_df.ppe_donors,
    text = merged_covid_ppe_hosp_donors_df.ppe_donors_requests_text,
    colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
    zmin = 0,
    zmax=5,
    title = ('PPE Donors Per County - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    html_filename = ('PPE_Donors_Per_County_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> PPE Donors'
    )

### Calculate donors per requests

In [None]:
merged_covid_ppe_hosp_donors_df = calculate_donor_per_requester(
    merged_covid_ppe_hosp_donors_df)

In [None]:
# Donor vs Request plotting
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_donors_df.fips,
    z = merged_covid_ppe_hosp_donors_df.PPE_Donor_Per_Requester,
    text = merged_covid_ppe_hosp_donors_df.ppe_donors_requests_ratio_text,
    #colorscale = ["#e96e81","#fc7555","#FFC831","#fdfcef","#D2FBFF","#c7e9b4","#1ee81e"],
    colorscale=['#ea6f67',"#fdfcef","#39bbec"],
    zmin = 0,
    zmax=1,
    title = ('PPE Donors Per Requestors - Counties with Donors Only - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    html_filename = ('PPE_Donors_Per_Requestors_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> PPE Donors'
    )

### Find closest requesters to donor

In [None]:
requestor_info_df = create_requestor_df_for_querying_requesters(mask_df, merged_covid_ppe_hosp_df)
requestor_info_df.head(2)

### Map the Top 10 closest for all donors

In [None]:
# create temporary records file to use in lambda function
requestor_info_df_records = requestor_info_df[[
    'institution','address','city','need','instructions','lat','lon',
    'Hazard_Index_Covid_Cases_Per_Hosp_Bed']].to_dict('records')

# Map the 10 closest requestors to a list
ppe_donors_with_zip_df['Closest_Requestor_List'] = ppe_donors_with_zip_df.apply(
        lambda x: k10_closest(requestor_info_df_records,{'lat': x['lat'], 'lon': x['lon']}), axis=1)

# Allocate the 10 closest requestors to individual columns
ppe_donors_with_zip_df['Closest_Requestor_1'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][0], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_2'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][1], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_3'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][2], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_4'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][3], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_5'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][4], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_6'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][5], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_7'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][6], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_8'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][7], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_9'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][8], axis=1)
ppe_donors_with_zip_df['Closest_Requestor_10'] = ppe_donors_with_zip_df.apply(
        lambda x: x['Closest_Requestor_List'][9], axis=1)

# Drop the requestors list
ppe_donors_with_zip_df.drop(['Closest_Requestor_List'], axis=1)

ppe_donors_with_zip_df.head(2)

### Write out the matches to disk

In [None]:
write_out_csv = True
ppe_donors_with_zip_df
if write_out_csv:
    timestr = time.strftime("%Y%m%d")
    path = 'PPE_donors_proximity_to_ppe_requestors_' + timestr + '.csv'
    ppe_donors_with_zip_df.to_csv (path, index = False, header=True)