# Correlation of PPE Demand in USA With Covid19 Cases

In [47]:
# import data
import json, requests
import os, sys, time
import os.path as path
import configparser

# computing
import pandas as pd
import numpy as np

# Import geopandas package
import plotly.express as px

### Add python path and add local packages

In [45]:
# Add getusppe_viz/src to python path (two up)
two_up = path.abspath(path.join(os.getcwd(),"../.."))
src_dir = os.path.join(two_up,'src')
if src_dir not in sys.path:
    sys.path.append(src_dir)

# Import local libraries
from geocode import geocoder
from mapping import choropleth_mapbox_usa_plot
from data_download import download_findthemasks_data, \
    download_nytimes_data, \
    download_hospital_data
from data_process import add_fips_county_info, \
    requests_per_county, \
    download_county_geojson_and_merge_df, \
    merge_covid_ppe_df, \
    process_hospital_data, \
    merge_covid_ppe_hosp_df, \
    calculate_covid_per_bed_available, \
    find_counties_with_covid19_and_no_ppe_request

### Configs

In [4]:
config = configparser.ConfigParser()
config.read(os.path.join(src_dir,'config.ini'))

ny_times_covid_date = config['viz']['ny_times_covid_date']
findthemasks_url = config['viz']['findthemasks_url']
request_headers = eval(config['viz']['request_headers'])
county_fips_download_url = config['viz']['county_fips_download_url']
geojson_url = config['viz']['geojson_url']
ny_times_county_data_url = config['viz']['ny_times_county_data_url']
hospital_download_url = config['viz']['hospital_download_url']

## Download find the mask data and convert to pandas
- Taken from find the mask [web visualization](https://findthemasks.com/give.html) 
- [Data updated every 5 mins here](findthemasks.com/data.json) - The data visulized here is from 3/25 at 10PM PST

In [5]:
mask_df = download_findthemasks_data(
    url = findthemasks_url, request_headers = request_headers)
mask_df.head(2)

Unnamed: 0,Approved,Reason not published,Deduped,Mod Status,Timestamp,What is the name of the hospital or clinic?,Final Address,Street address for dropoffs?,City,State,Drop_Off_Instructions,What do you need?,Will you accept open boxes/bags?,Type of request:,Lat,Lng,Row
0,x,,x,FM,3/19/2020 11:14:25,Swedish Ballard,"5300 Tallman Ave NW\nSeattle, WA 98107","5300 Tallman Ave NW\nSeattle, WA 98107",Seattle,WA,Put in donations bin at registration desk or a...,"N95s, Surgical Masks",Yes,,47.6674625,-122.3795306,3.0
1,x,,x,FM,3/19/2020 14:37:04,Zuckerberg San Francisco General Hospital,"1001 Potrero Ave\nSan Francisco, CA 94110","1001 Potrero Ave\nSan Francisco, CA 94110",San Francisco,CA,"For now, call ahead: call the switchboard (628...","N95s, Surgical Masks",No,,37.7557265,-122.4047381,4.0


### Create geocoder class to find fips and county information by lat/long

In [6]:
geocoder = geocoder(county_fips_download_url)

### Search and add the FIPS code to each row - WILL TAKE SEVERAL MINS

In [7]:
mask_df = add_fips_county_info(mask_df, geocoder)

Pulling geocodes from Lat+Lng. This will take awhile...
Loading formatted geocoded file...


### Sum amount of requests per county

In [10]:
mask_df_counties = requests_per_county(mask_df, write_out_csv = True)
mask_df_counties.head(5)

Unnamed: 0,fips,county,State,counts
0,1003,Baldwin County,AL,2
1,1069,Houston County,AL,1
2,1073,Jefferson County,AL,2
3,1089,Madison County,AL,2
4,1101,Montgomery County,AL,1


### Download county geo information

In [27]:
counties, merged_df = download_county_geojson_and_merge_df(geojson_url, mask_df_counties)
merged_df.head(2)

Unnamed: 0,geometry,fips,GEO_ID,STATE,county,CENSUSAREA,PPE_requests,county_info_for_map,ppe_text
0,"{'type': 'Polygon', 'coordinates': [[[-86.4967...",1001,0500000US01001,Alabama,Autauga,594.436,0.0,"PPE Requests: Autauga, Alabama","PPE Requests: 0<br>Autauga, Alabama"
1,"{'type': 'Polygon', 'coordinates': [[[-86.5777...",1009,0500000US01009,Alabama,Blount,644.776,0.0,"PPE Requests: Blount, Alabama","PPE Requests: 0<br>Blount, Alabama"


### Map of PPE Requests

In [None]:
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_df.fips,
    z = merged_df.PPE_requests,
    text = merged_df.ppe_text,
    #colorscale = ["#fdfcef", "#c7e9b4", "#6ab7a6","#41b6c4","#2c7fb8","#253494"],
    colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
    zmin = 0,
    zmax=5,
    title = ('PPE Requests By County - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    colorbar_title = '> PPE Requests',
    html_filename = 'PPE_Requests_By_County.html')

## Download COVID19 data and convert to pandas

In [29]:
covid_df = download_nytimes_data(
    ny_times_county_data_url, ny_times_covid_date, write_out_csv = True)
covid_df.head(5)

Unnamed: 0,date,county,state,fips,cases,deaths
19713,2020-03-30,Autauga,Alabama,1001,7,0
19714,2020-03-30,Baldwin,Alabama,1003,18,0
19715,2020-03-30,Bibb,Alabama,1007,2,0
19716,2020-03-30,Blount,Alabama,1009,5,0
19717,2020-03-30,Bullock,Alabama,1011,3,0


### Merge the covid and Nytimes data

In [30]:
merged_covid_ppe_df=merge_covid_ppe_df(covid_df,merged_df) 
merged_covid_ppe_df.head(2)

Unnamed: 0,geometry,fips,GEO_ID,STATE,county,CENSUSAREA,PPE_requests,county_info_for_map,ppe_text,cases,deaths,covid_text
0,"{'type': 'Polygon', 'coordinates': [[[-86.4967...",1001,0500000US01001,Alabama,Autauga,594.436,0.0,"PPE Requests: Autauga, Alabama","PPE Requests: 0<br>Autauga, Alabama",7.0,0.0,"Autauga, Alabama<br><br>Covid19: <br>Cases: 7<..."
1,"{'type': 'Polygon', 'coordinates': [[[-86.5777...",1009,0500000US01009,Alabama,Blount,644.776,0.0,"PPE Requests: Blount, Alabama","PPE Requests: 0<br>Blount, Alabama",5.0,0.0,"Blount, Alabama<br><br>Covid19: <br>Cases: 5<b..."


### Mapping covid cases 

In [None]:
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_df.fips,
    z = merged_covid_ppe_df.cases,
    text = merged_covid_ppe_df.covid_text,
    colorscale = ["#fdfcef","#ffda55","#FFC831","#fc7555","#e96e81",],
    zmin = 0,
    zmax=100,
    title = ('COVID19 Cases Per County - %s - (Hover for breakdown)' % ny_times_covid_date),
    html_filename = ('COVID19_Cases_Per_County_%s.html' % ny_times_covid_date),
    colorbar_title = '> COVID19 Cases',
)

## Hospital bed visualization by county 

In [32]:
hospital_df = download_hospital_data(hospital_download_url, write_out_csv = True)
hospital_df.head(3)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,LATITUDE,LONGITUDE,STATE,CITY,NAME,OBJECTID,ID,ADDRESS,ZIP,TELEPHONE,...,NAICS_CODE,NAICS_DESC,SOURCE,WEBSITE,ALT_NAME,OWNER,BEDS,TRAUMA,HELIPAD,fips
228,40.442784,-79.960707,PA,PITTSBURGH,UPMC PRESBYTERIAN,6623,11315213,200 LOTHROP STREET,15213,(412) 647-5286,...,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,http://sais.health.pa.gov/commonpoc/content/pu...,http://www.upmc.com/locations/hospitals/presby...,NOT AVAILABLE,NON-PROFIT,1592,LEVEL I,Y,42003
291,41.304691,-72.935414,CT,NEW HAVEN,YALE-NEW HAVEN HOSPITAL,5117,1906504,20 YORK ST,6504,(203) 688-4242,...,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,https://www.elicense.ct.gov/Lookup/GenerateRos...,http://www.ynhh.org,NOT AVAILABLE,NON-PROFIT,1541,"LEVEL I, LEVEL I PEDIATRIC",Y,9009
2099,36.133548,-120.240446,CA,COALINGA,DEPARTMENT OF STATE HOSPITAL - COALINGA,1041,73693210,24511 WEST JAYNE AVENUE,93210,(559) 935-4300,...,622210,PSYCHIATRIC AND SUBSTANCE ABUSE HOSPITALS,http://www.oshpd.ca.gov/HID/Facility-Listing.html,http://www.dsh.ca.gov/Coalinga/default.asp,NOT AVAILABLE,GOVERNMENT - STATE,1500,NOT AVAILABLE,N,6019


In [33]:
hospital_df_counties = process_hospital_data(hospital_df, write_out_csv = True)
hospital_df_counties.head(2)

Unnamed: 0,fips,COUNTY,BEDS
0,1001,AUTAUGA,85
1,1003,BALDWIN,398


In [34]:
merged_covid_ppe_hosp_df=merge_covid_ppe_hosp_df(hospital_df_counties,merged_covid_ppe_df) 
merged_covid_ppe_hosp_df.head(2)

Unnamed: 0,geometry,fips,GEO_ID,STATE,county,CENSUSAREA,PPE_requests,county_info_for_map,ppe_text,cases,deaths,covid_text,BEDS,hosp_text
0,"{'type': 'Polygon', 'coordinates': [[[-86.4967...",1001,0500000US01001,Alabama,Autauga,594.436,0.0,"PPE Requests: Autauga, Alabama","PPE Requests: 0<br>Autauga, Alabama",7.0,0.0,"Autauga, Alabama<br><br>Covid19: <br>Cases: 7<...",85.0,"Autauga, Alabama<br><br>Hospital Beds: 85<br><..."
1,"{'type': 'Polygon', 'coordinates': [[[-86.5777...",1009,0500000US01009,Alabama,Blount,644.776,0.0,"PPE Requests: Blount, Alabama","PPE Requests: 0<br>Blount, Alabama",5.0,0.0,"Blount, Alabama<br><br>Covid19: <br>Cases: 5<b...",40.0,"Blount, Alabama<br><br>Hospital Beds: 40<br><b..."


In [None]:
# Hospital bed plotting
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_df.fips,
    z = merged_covid_ppe_hosp_df.BEDS,
    text = merged_covid_ppe_hosp_df.hosp_text,
    colorscale = ["#fdfcef","#c7e9b4","#D2FBFF","#36A2B9","#004469"],
    zmin = 0,
    zmax=500,
    title = ('Hospital beds per county - %s - (Hover for breakdown)' % time.strftime("%Y%m%d")),
    html_filename = ('Hospital_beds_per_county_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> Hospital Beds'
    )

### Covid cases per bed available

In [36]:
merged_covid_ppe_hosp_df = calculate_covid_per_bed_available(merged_covid_ppe_hosp_df)
merged_covid_ppe_hosp_df.head(2)

Unnamed: 0,geometry,fips,GEO_ID,STATE,county,CENSUSAREA,PPE_requests,county_info_for_map,ppe_text,cases,deaths,covid_text,BEDS,hosp_text,Covid_cases_per_bed
1790,"{'type': 'Polygon', 'coordinates': [[[-111.046...",49043,0500000US49043,Utah,Summit,1871.712,0.0,"PPE Requests: Summit, Utah","PPE Requests: 0<br>Summit, Utah",176.0,0.0,"Summit, Utah<br><br>Covid19: <br>Cases: 176<br...",26.0,"Summit, Utah<br><br>HAZARD RATIO (Cases/Bed): ...",6.769231
2987,"{'type': 'Polygon', 'coordinates': [[[-113.472...",16013,0500000US16013,Idaho,Blaine,2643.586,0.0,"PPE Requests: Blaine, Idaho","PPE Requests: 0<br>Blaine, Idaho",148.0,2.0,"Blaine, Idaho<br><br>Covid19: <br>Cases: 148<b...",25.0,"Blaine, Idaho<br><br>HAZARD RATIO (Cases/Bed):...",5.92


In [None]:
# Map hazard ratio
choropleth_mapbox_usa_plot(
    counties = counties,
    locations = merged_covid_ppe_hosp_df.fips,
    z = merged_covid_ppe_hosp_df.Covid_cases_per_bed,
    text = merged_covid_ppe_hosp_df.hosp_text,
    colorscale = ["#fdfcef","#ffda55","#FFC831","#fc7555","#e96e81",],
    zmin = 0,
    zmax=1,
    title = ('Hazard Ratio: Covid19 Cases, Per Bed, Per County - %s - (Hover for breakdown)' % ny_times_covid_date),
    html_filename = ('Covid19_cases_per_bed_per_county_%s.html' % time.strftime("%Y%m%d")),
    colorbar_title = '> Hazard Ratio (Cases/Bed)'
    )

### Identify counties with No PPE requests, with highest Covid19 cases

In [42]:
covid_ppe_df = find_counties_with_covid19_and_no_ppe_request(covid_df, mask_df_counties)
covid_ppe_df[['date','county','state','cases','deaths','PPE_requests']].head(10)

Unnamed: 0,date,county,state,cases,deaths,PPE_requests
20962,2020-03-30,Rockland,New York,2511,8,0.0
19956,2020-03-30,District of Columbia,District of Columbia,495,9,0.0
20041,2020-03-30,Cobb,Georgia,250,11,0.0
20468,2020-03-30,Caddo,Louisiana,222,5,0.0
19909,2020-03-30,Eagle,Colorado,213,4,0.0
21254,2020-03-30,Monroe,Pennsylvania,182,7,0.0
21569,2020-03-30,Summit,Utah,176,0,0.0
20960,2020-03-30,Putnam,New York,167,0,0.0
20535,2020-03-30,Baltimore city,Maryland,152,2,0.0
20150,2020-03-30,Blaine,Idaho,148,2,0.0


### Correlation of PPE request per county with COVID19 cases

In [43]:
# select counties that have had at least 1 ppe request
counties_with_ppe_requests_and_covid_cases = merged_covid_ppe_hosp_df[merged_covid_ppe_hosp_df.PPE_requests != 0]

# sort by highest normalized_covid_patients_per_bed
counties_with_ppe_requests_and_covid_cases.sort_values(by=['PPE_requests','cases'], ascending=False, inplace=True)
counties_with_ppe_requests_and_covid_cases.head(5)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,geometry,fips,GEO_ID,STATE,county,CENSUSAREA,PPE_requests,county_info_for_map,ppe_text,cases,deaths,covid_text,BEDS,hosp_text,Covid_cases_per_bed
2049,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",6037,0500000US06037,California,Los Angeles,4057.884,113.0,"PPE Requests: Los Angeles, California","PPE Requests: 113<br>Los Angeles, California",2474.0,44.0,"Los Angeles, California<br><br>Covid19: <br>Ca...",30371.0,"Los Angeles, California<br><br>HAZARD RATIO (C...",0.081459
2991,"{'type': 'Polygon', 'coordinates': [[[-87.5240...",17031,0500000US17031,Illinois,Cook,945.326,64.0,"PPE Requests: Cook, Illinois","PPE Requests: 64<br>Cook, Illinois",3727.0,44.0,"Cook, Illinois<br><br>Covid19: <br>Cases: 3727...",18259.0,"Cook, Illinois<br><br>HAZARD RATIO (Cases/Bed)...",0.204119
2475,"{'type': 'MultiPolygon', 'coordinates': [[[[-1...",53033,0500000US53033,Washington,King,2115.566,37.0,"PPE Requests: King, Washington","PPE Requests: 37<br>King, Washington",2332.0,152.0,"King, Washington<br><br>Covid19: <br>Cases: 23...",5461.0,"King, Washington<br><br>HAZARD RATIO (Cases/Be...",0.427028
717,"{'type': 'Polygon', 'coordinates': [[[-118.093...",6059,0500000US06059,California,Orange,790.568,37.0,"PPE Requests: Orange, California","PPE Requests: 37<br>Orange, California",464.0,4.0,"Orange, California<br><br>Covid19: <br>Cases: ...",8444.0,"Orange, California<br><br>HAZARD RATIO (Cases/...",0.05495
2248,"{'type': 'Polygon', 'coordinates': [[[-121.855...",6001,0500000US06001,California,Alameda,739.017,32.0,"PPE Requests: Alameda, California","PPE Requests: 32<br>Alameda, California",304.0,7.0,"Alameda, California<br><br>Covid19: <br>Cases:...",4046.0,"Alameda, California<br><br>HAZARD RATIO (Cases...",0.075136


In [44]:
fig = px.scatter(
    counties_with_ppe_requests_and_covid_cases,
    x=counties_with_ppe_requests_and_covid_cases.cases, 
    y=counties_with_ppe_requests_and_covid_cases.PPE_requests,
    color='Covid_cases_per_bed',
    log_x=True,
    #log_y=True,
    labels={
        'Covid_cases_per_bed':'Covid19 cases per hospital bed',
        'x':'Covid19 Cases Per County',
        'y':'PPE Requests Per County',
        'text':'County'
        },
    hover_name=counties_with_ppe_requests_and_covid_cases.county,
    range_color=(0,1),
    range_x=(1,30000)
    )

fig.update_layout(
    title = "Correlation of PPE request per county with COVID19 cases",
    #hoverlabel={'text'},
    )

#fig.update_xaxes(nticks=30)
#fig.update_yaxes(nticks=20)
    
fig.show()