# Correlation of PPE Demand in USA With Covid19 Cases

In [None]:
# import data
import json
import time
import requests
from io import StringIO
import os

# computing
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

# Import geopandas package
import geopandas as gpd
import reverse_geocoder as rg
import addfips
import plotly.figure_factory as ff
import plotly.graph_objects as go

# plotting
import plotly.express as px
import plotly.graph_objects as go

## Download find the mask data and convert to pandas
- Taken from find the mask [web visualization](https://findthemasks.com/give.html) 
- [Data updated every 5 mins here](findthemasks.com/data.json) - The data visulized here is from 3/25 at 10PM PST

In [None]:
# Specify URL and also headers (the data requires browser headers)
url = 'http://findthemasks.com/data.json'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3"}

# Download the data
s=requests.get(url, headers= headers).text

# Extract the json format, and find column headers
json_data = json.loads(s)
HEADERS = json_data['values'][0]
print(HEADERS)

# create the data frame
mask_df = pd.DataFrame.from_dict(json_data['values'][2:])
mask_df.columns=HEADERS

In [None]:
# Using DataFrame.drop
mask_df = mask_df.dropna(how='any', subset=['Lat', 'Lng'])

# Remove the extra columns
# columnns_to_drop=[0,1,2]
# mask_df.drop(mask_df.columns[columnns_to_drop], axis=1, inplace=True)

# Rename the State? column
mask_df.rename(columns={'State?': 'State'}, inplace=True)

# Drop institutions with multiple entries
mask_df.drop_duplicates(subset='What is the name of the hospital or clinic?', inplace=True)

# Print out
mask_df.head(2)

In [None]:
# sum counts per state, and reset to dataframe
mask_df_states=mask_df.groupby(['State']).size().reset_index(name='counts')
mask_df_states.head(5)

### Map PPE requests by state

In [None]:
for col in mask_df_states.columns:
    mask_df_states[col] = mask_df_states[col].astype(str)

mask_df_states['text'] = '<br>'

fig = go.Figure(data=go.Choropleth(
    locations=mask_df_states['State'],
    z=mask_df_states['counts'].astype(float),
    locationmode='USA-states',
    colorscale='Blues',
    autocolorscale=False,
    text= mask_df_states['text'], # hover text
    marker_line_color='white', # line markers between states
    colorbar_title="Mask Requests"
))

fig.update_layout(
    title_text='Medical Systems Requesting PPE',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

fig.show()

### Map PPE requests by county

In [None]:
# download the county_Fips information
url = 'https://github.com/ShyamW/Geocoding_Suite/blob/master/Lat_Lng_to_County_Data/county_Fips.txt'
contents=requests.get(url).text

with open('county_Fips.txt', 'w') as f:
    f.write(contents)

In [None]:
class geocoder:
    def __init__(self):
        self.af = addfips.AddFIPS()
        # TODO: instantiate Reverse Geocoder
        
    def fips_code_lookup(self, county, state):
        # Lookup of fips code (https://github.com/fitnr/addfips)
        fips = self.af.get_county_fips(county, state)
        return fips

    def get_geocoder_info_from_rg(self, Lat, Lng):
        try:
            # Reverse geocoder api call to get county name
            coordinates = (Lat, Lng)
            results = rg.search(coordinates) # default mode = 2
            county = results[0]['admin2']
            state = results[0]['admin1']

            # Lookup of fips code (https://github.com/fitnr/addfips)
            fips = fips_code_lookup(county,state)

            # return the fip and county
            return {'fips':fips, 'county':county}
        except:
            return {'fips':'NA', 'county':'NA'}

### Search and add the FIPS code to each row - WILL TAKE SEVERAL MINS

In [None]:
# Start geocoder class to preload adfips object
geocoder = geocoder()

# Start tdqm timer from tqdm.auto
tqdm.pandas()

# Reverse geocoder used to get geocoded fips and county information
# Note: Progress_apply is used for the timer functionality
mask_df['geocoder'] = mask_df.progress_apply(
    lambda x: geocoder.get_geocoder_info_from_rg(x['Lat'], x['Lng']), axis=1)

# Map the geocoder dict column to individual columns
mask_df['fips'] = mask_df.apply(
    lambda x: x['geocoder']['fips'], axis=1)
mask_df['county'] = mask_df.apply(
    lambda x: x['geocoder']['county'], axis=1)
mask_df.drop(columns=['geocoder'],inplace = True)

# Using DataFrame.drop to remove any fips code that could not be mapped
mask_df = mask_df.dropna(how='any', subset=['fips','county'])

##### Save out the processed data

In [None]:
# write out this data file to csv
timestr = time.strftime("%Y%m%d")
path = 'findthemasks_data_processed_' + timestr + '.csv'
mask_df.to_csv (path, index = False, header=True)

##### TODO
Some of the data written out is corrupted and misaligned by row
Not sure what the bug is right now

In [None]:
# Count the amount of requests per county
mask_df_counties=mask_df.groupby(['fips','county']).size().reset_index(name='counts')
mask_df_counties.head(5)

### Map PPE requests by County

In [None]:
# Load the county information
url = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'

# Download the data
s=requests.get(url).text

# Extract the json format, and find column headers
counties = json.loads(s)

In [None]:
# Create the figures
fig = px.choropleth(
    mask_df_counties, 
    geojson=counties,
    locations='fips', 
    color='counts',
    color_continuous_scale="RdBu_r",
    range_color=(-1, 10),
    scope="usa",
    labels={'county'},
    title='PPE Requests By County',
    #marker_line_color='white', # line markers between states
    #colorbar_title="Number of Requests (10 or more)"
    )
#fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Download COVID19 data and convert to pandas

In [None]:
date = '2020-03-26'
covid_df = pd.read_csv("https://github.com/nytimes/covid-19-data/raw/master/us-counties.csv")
covid_df = covid_df.loc[covid_df['date'] == date]
covid_df.head()

In [None]:
# drop the rows without a fips value
covid_df = covid_df.dropna(how='any', subset=['fips'])

# convert to int to remove the decimal values
covid_df['fips'] = covid_df['fips'].apply(int)

# Zfill all countyFIPS to be 5 characters
width=5
covid_df["fips"]= covid_df["fips"].astype(str)
covid_df["fips"]= covid_df["fips"].str.zfill(width) 
covid_df.head(5)

In [None]:
#### Save out the processed covid data

In [None]:
# write out this data file to csv
timestr = time.strftime("%Y%m%d")
path = 'COVID19_nytimes_' + date + ' data_processed_on_' + timestr + '.csv'
covid_df.to_csv (path, index = False, header=True)

In [None]:
# Create the figures
fig = px.choropleth(
    covid_df, 
    geojson=counties,
    locations='fips', 
    color='cases',
    color_continuous_scale="RdBu_r",
    range_color=(-1, 100),
    scope="usa",
    labels={'county'},
    title='COVID19 Cases Per County: ' + date
    #marker_line_color='white', # line markers between states
    #colorbar_title="Number of Requests (10 or more)"
    )
#fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Hospital bed visualization by county 

In [None]:
# Import hospital information compiled by https://beta.covidmap.link/
url = 'https://docs.google.com/spreadsheet/ccc?key=15gZsozGQp-wdJaSngvLV13iCf_2mm2IsZpHOPxZtvtI&output=csv'

hospital_df = pd.read_csv(url)
hospital_df.head(2)

In [None]:
# Start tdqm timer from tqdm.auto
tqdm.pandas()

# Reverse geocoder used to get geocoded fips and county information
# Note: Progress_apply is used for the timer functionality
hospital_df['fips'] = hospital_df.progress_apply(
    lambda x: geocoder.fips_code_lookup(x['COUNTY'], x['STATE']), axis=1)

In [None]:
# Sum the amount of beds per county
hospital_df_counties = hospital_df.groupby(['fips','COUNTY'])['BEDS'].sum().reset_index()
hospital_df_counties.head(2)

In [None]:
# write out this data file to csv
timestr = time.strftime("%Y%m%d")
path = 'hospital_data_processed_per_county' + timestr + '.csv'
hospital_df_counties.to_csv (path, index = False, header=True)

In [None]:
# Create the figures
fig = px.choropleth(
    hospital_df_counties, 
    geojson=counties,
    locations='fips', 
    color='BEDS',
    color_continuous_scale="RdBu_r",
    range_color=(-1, 500),
    scope="usa",
    labels={'county'},
    title='Hospital beds per county: ' + date
    #marker_line_color='white', # line markers between states
    #colorbar_title="Number of Requests (10 or more)"
    )
#fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()