In [193]:
# Suzan Iloglu, May 21,2020
# Import packages
import csv
import gurobipy as gp
from itertools import product
import geopandas as gpd
import pandas as pd
import numpy as np
import math
import time
import requests
import io
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
pd.options.display.max_columns =200
from IPython.display import Image


# MAPPING THE NEW POLITICS OF CARE: COMMUNITY HEALTH WORKERS
The project presents multiple options for how individual workers in such a Community Health Corps might be distributed within each state. It shows that what you choose to prioritize greatly impacts where care would be sent. We can define communities in greatest need in many ways: we can think about our current crisis and send people to where the COVID19 pandemic rages most fiercely; we can think of long term measures of social and economic inequality embedded in metrics like the Centers for Disease Control and Prevention’s Social Vulnerability Index; we can focus on the places with too many people dying too young and use the County Health Rankings Years-of-Potential-Life-Lost measure; we can think of joblessness and how the pandemic has thrown many into unemployment and target our resources in this way. 

The followings are our options to choose to define vulnerability:


- SOCIAL VULNERABILITY INDEX
- MEDICAID 
- UNEMPLOYMENT
- YEARS OF POTENTIAL LIFE LOST
- TOTAL COVID CASES
- COVID CASES BY POPULATION
- COVID DEATHS BY POPULATION

We will start with Social Vulnerability Index (SVI) from CDC website.

### I. Importing SVI data which includes the variables for calculating county SVI for each state
The CDC uses both a USA-wide and a state by state SVI scores. For our project given that funding is likely going to be managed at a state level, using a state by state SVI scores makes the most sense and will be most sensitive to regional socioeconomic differences. Even though the CDC SVI scores are calculated using percentile rankings, the data sets include raw data estimates for each variables. The following table shows the variablaes used in the method of calculating SVI scores. 




      American Community Survey (ACS), 2014-2018 (5-year) data for the following estimates:
<img src="Data/img/SVI_comp.png" width="500">


Note: Full documentation for 2018 data is available <a href="https://svi.cdc.gov/data-and-tools-download.html">here</a> 
This part of the code shows preliminary mapping of <a href = "https://svi.cdc.gov/">the CDC's Social Vulnerability Index</a>.

Later in the notebook, we will provide the formula to create the SVI value we use in our project. First, we import the data for the US mainland and Puerto Rico.

In [194]:
## import svi data downloaded from CDC website as cited above

## 48 state SVI scores by county
svi_counties_mainland = gpd.read_file("Data/SVI2018_US_COUNTY/SVI2018_US_county.shp")

## Puerto Rico SVI scores by county
svi_counties_puerto_rico = gpd.read_file("Data/PuertoRico_COUNTY/SVI2018_PuertoRico_county.shp")

## Merge 48 states and Puerto Rico SVI 
svi_counties = pd.concat([svi_counties_mainland,svi_counties_puerto_rico ], sort = False)


In [195]:
## Replacing -999 values with 0 for calculations
svi_county = svi_counties.fillna(0)
svi_county  = svi_county.replace(-999, 0)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)

In [196]:
## Create the list for State
State = svi_county.STATE.unique().tolist()

In [197]:
# Create a seperate dictionary for the variables to calculate SVI

# Persons below poverty estimate, 2014-2018 ACS
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Civilian (age 16+) unemployed estimate, 2014-2018 ACS
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.E_UNEMP))

# Per capita income estimate, 2014-2018 ACS
E_PCI = dict(zip(svi_county.FIPS, svi_county.E_PCI))

# Persons (age 25+) with no high school diploma estimate, 2014-2018 ACS
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.E_NOHSDP))

# Persons aged 65 and older estimate
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.E_AGE65))

# Persons aged 17 and younger estimate
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.E_AGE17))

# Population with a disability estimate
E_DISABL = dict(zip(svi_county.FIPS, svi_county.E_DISABL))

# Single parent households with children under 18 estimate
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.E_SNGPNT))

# Minority (all persons except white, nonHispanic) estimate, 2014-2018 ACS
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.E_MINRTY))

# Persons (age 5+) who speak English "less than well" estimate, 2014-2018 ACS
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.E_LIMENG))

# Housing in structures with 10 or more units estimate, 2014-2018 ACS
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.E_MUNIT))

# Mobile homes estimate MOE, 2014-2018 ACS
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.E_MOBILE))

# At household level (occupied housing units), more people than rooms estimate, 2014-2018 ACS
E_CROWD = dict(zip(svi_county.FIPS, svi_county.E_CROWD))

# Households with no vehicle available estimate, 2014-2018 ACS
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.E_NOVEH))

# Persons in institutionalized group quarters estimate, 2014-2018 ACS
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.E_GROUPQ))

# Percentage of persons below poverty estimate
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Medicaid 
Medicaid is a means-tested health insurance program for low-income children, pregnant women, adults, seniors, and people with disabilities. Medicaid is jointly funded by federal and state governments and managed by states within federal standards and a wide range of state options. <a href="https://data.medicaid.gov/Enrollment/State-Medicaid-and-CHIP-Applications-Eligibility-D/n5ce-jxme"> Data Source for Medicaid Enrollment </a> 

In [198]:

import sodapy
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.medicaid.gov", None)


# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("83yt-67it", limit=4000)


# Read the medicaid demand data
#df_mm = pd.read_csv("Data/2020_06_Preliminary_applications__eligibility_determinations__and_enrollment_data.csv")

# Convert to pandas DataFrame
df_mm = pd.DataFrame.from_records(results)
df_mm.head(5)
df_mm.columns




Index(['applications_for_financial_assistance_submitted_to_the_state_based_marketplace',
       'applications_for_financial_assistance_submitted_to_the_state_based_marketplace_footnotes',
       'final_report', 'geocoded_column',
       'individuals_determined_eligible_for_chip_at_application',
       'individuals_determined_eligible_for_chip_at_application_footnotes',
       'individuals_determined_eligible_for_medicaid_at_application',
       'individuals_determined_eligible_for_medicaid_at_application_footnotes',
       'latitude', 'longitude', 'medicaid_and_chip_child_enrollment',
       'medicaid_and_chip_child_enrollment_footnotes',
       'new_applications_submitted_to_medicaid_and_chip_agencies',
       'new_applications_submitted_to_medicaid_and_chip_agencies_footnotes',
       'preliminary_updated', 'report_date', 'state_abbreviation',
       'state_expanded_medicaid', 'state_name',
       'total_applications_for_financial_assistance_submitted_at_state_level',
       'total_a

In [199]:
df_mm['State Name'] = df_mm['state_name'].str.upper() 

In [200]:
Medicaid_state = dict(zip(df_mm['State Name'], df_mm['total_medicaid_and_chip_enrollment']))
Medicaid_state['PUERTO RICO'] = 1622194
print (Medicaid_state)

{'ALABAMA': '957116', 'ALASKA': '231145', 'ARIZONA': '1839932', 'ARKANSAS': '830467', 'CALIFORNIA': '11847711', 'COLORADO': '1337805', 'CONNECTICUT': '874974', 'DELAWARE': '239009', 'DISTRICT OF COLUMBIA': '248591', 'FLORIDA': '3892552', 'GEORGIA': '1928703', 'HAWAII': '351337', 'IDAHO': '340742', 'ILLINOIS': '2987496', 'INDIANA': '1602976', 'IOWA': '699741', 'KANSAS': '401103', 'KENTUCKY': '1416013', 'LOUISIANA': '1585024', 'MAINE': '232455', 'MARYLAND': '1372695', 'MASSACHUSETTS': '1616404', 'MICHIGAN': '2439425', 'MINNESOTA': '1085778', 'MISSISSIPPI': '632427', 'MISSOURI': '923641', 'MONTANA': '247333', 'NEBRASKA': '254159', 'NEVADA': '685073', 'NEW HAMPSHIRE': '193436', 'NEW JERSEY': '1759653', 'NEW MEXICO': '772102', 'NEW YORK': '6263164', 'NORTH CAROLINA': '1851558', 'NORTH DAKOTA': '96757', 'OHIO': '2788134', 'OKLAHOMA': '797220', 'OREGON': '1053931', 'PENNSYLVANIA': '3069309', 'RHODE ISLAND': '305208', 'SOUTH CAROLINA': '1048276', 'SOUTH DAKOTA': '114059', 'TENNESSEE': '1489536

In [201]:
df_mmm = pd.read_csv("Data/ACSST5Y2018.S2704_data_with_overlays_2020-08-01T140649.csv", header=[1])
df_mmm.head(1)
#df_mmm.dtypes

Unnamed: 0,id,Geographic Area Name,Estimate!!Total!!Civilian noninstitutionalized population,Margin of Error!!Total MOE!!Civilian noninstitutionalized population,Estimate!!Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Percent Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Percent Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Total!!Medicare coverage alone or in combination,Margin of Error!!Total MOE!!Medicare coverage alone or in combination,Estimate!!Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Total!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Total!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Total!!VA health care coverage alone or in combination,Margin of Error!!Total MOE!!VA health care coverage alone or in combination,Estimate!!Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Total!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Total!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Total!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,"Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)",Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone
0,0500000US01001,"Autauga County, Alabama",54277,219,18191,965,33.5,1.8,(X),(X),10026,410,18.5,0.8,14134,125,36,59,0.3,0.4,32229,261,2155,369,6.7,1.1,7914,152,7835,155,99.0,0.7,(X),(X),9049,859,16.7,1.6,14134,125,5352,682,37.9,4.9,32229,261,2788,452,8.7,1.4,7914,152,909,183,11.5,2.4,(X),(X),1701,290,3.1,0.5,14134,125,8,13,0.1,0.1,32229,261,989,252,3.1,0.8,7914,152,704,156,8.9,2.0,12303,1299,7466,880,60.7,4.9,41915,1301,10666,650,25.4,1.5,17605,763,1001,233,5.7,1.4,3974,267,1595,316,40.1,7.6,10160,310,3801,489,37.4,5.1,4406,284,560,235,12.7,5.3,6224,268,550,209,8.8,3.3,7042,206,1095,224,15.5,3.2,7771,134,1361,253,17.5,3.3,6786,87,1363,239,20.1,3.5,4697,68,4649,73,99.0,0.7,3217,138,3217,138,100.0,1.0,(X),(X),9513,834,17.5,1.5,(X),(X),2884,414,5.3,0.8,(X),(X),6503,810,12.0,1.5,(X),(X),126,101,0.2,0.2


In [202]:
df_mmm['FIPS'] = df_mmm.id.astype(str).str[9:]
df_mmm.head(5)
df_mmm.FIPS.astype(int)
df_mmm['FIPS'] = pd.to_numeric(df_mmm['FIPS'])

In [203]:
ACI_total  = dict(zip(df_mmm['FIPS'], df_mmm["Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination"]))

In [204]:

#df_m = pd.read_csv("Data/Medicaid_Demand.csv")

# Unemployment 
The unemployment rate is calculated by the U.S. Bureau of Labor Statistics as the percentage of the civilian labor force who are without jobs and have actively sought work within the past four weeks. <a href="https://www.bls.gov/lau/laufaq.htm#Q01"> Data Source for Unemployment  </a> 

In [205]:
from io import StringIO
import datetime 
from datetime import date
from dateutil.relativedelta import relativedelta

back = date.today() + relativedelta(months= -3)
three_months_ago = back.strftime('%b-%y') 


url = 'https://www.bls.gov/web/metro/laucntycur14.txt'
s = requests.get(url).text



df_unemp = pd.read_csv(StringIO(s), sep='|',  skiprows=7, skipfooter=6, engine='python', names = ['LAUS Area Code', 'FIPS State', 'FIPS County', 'Area Title', 'Period', 'Civilian Labor Force','Employed','Unemployed_Level','Unemployed_Rate'])

df_unemp['Period'] = df_unemp['Period'].astype(str)

df_unemp['FIPS'] = df_unemp['LAUS Area Code'].str[3:8]

df_unemp = df_unemp[df_unemp['Period'].str.contains(str(three_months_ago))]

df_unemp.head(5)
#df_unemp.dtypes

Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
38627,CN0100100000000,1,1,"Autauga County, AL",Aug-20,25446,24272,1174,4.6,1001
38628,CN0100300000000,1,3,"Baldwin County, AL",Aug-20,98267,93297,4970,5.1,1003
38629,CN0100500000000,1,5,"Barbour County, AL",Aug-20,9476,8806,670,7.1,1005
38630,CN0100700000000,1,7,"Bibb County, AL",Aug-20,8676,8141,535,6.2,1007
38631,CN0100900000000,1,9,"Blount County, AL",Aug-20,24792,23889,903,3.6,1009


In [206]:
#df_unemp[[ 'FIPS', 'Area Title', 'Period','Unemployed_Level','Unemployed_Rate']].to_csv('Data/County_employment.csv', index=False)


In [207]:


df_unemp.replace({'-', 0})

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.replace(',', '')

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].astype(str)

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.strip()

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].str.strip()

df_unemp['FIPS'] = pd.to_numeric(df_unemp['FIPS'])

df_unemp['Unemployed_Level'] = pd.to_numeric(df_unemp['Unemployed_Level'])

df_unemp['Unemployed_Rate'] = pd.to_numeric(df_unemp['Unemployed_Rate'])




# Fill NA with 0
#df_unemp = df_unemp.fillna(0)
df_unemp.tail(5)
#df_unemp.dtypes


Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
41841,CN7214500000000,72,145,"Vega Baja Municipio, PR",Aug-20,12806,11317,1489,11.6,72145
41842,CN7214700000000,72,147,"Vieques Municipio, PR",Aug-20,2487,2230,257,10.3,72147
41843,CN7214900000000,72,149,"Villalba Municipio, PR",Aug-20,6722,6096,626,9.3,72149
41844,CN7215100000000,72,151,"Yabucoa Municipio, PR",Aug-20,8064,7286,778,9.6,72151
41845,CN7215300000000,72,153,"Yauco Municipio, PR",Aug-20,9159,8203,956,10.4,72153


In [208]:
 
## Read the Unemployment data using cvs data 
#df_unemp = pd.read_csv("Data/Unemployment.csv")

# Fill NA with 0
#df_unemp = df_unemp.fillna(0)

#df_unemp.head(5)

# Years of Potential Life Lost (YPLL)

Years of Potential Life Lost (YPLL) measures the rate of premature deaths by region. YPLL is calculated as the sum of the estimated number of years that individuals would have lived if they had not died before the age of 75 per 100,000 people. <a href="https://www.countyhealthrankings.org/sites/default/files/media/document/2020%20County%20Health%20Rankings%20Data%20-%20v2.xlsx"> Data Source for YPLL.  </a> More information about YPLL can be dounf in this <a href="https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/county-health-rankings-model/health-outcomes/length-of-life/premature-death-ypll"> link. </a> 


In [209]:
# Read the YPLL data
df_y = pd.read_csv("Data/YPLL.csv")

# Fill NA with the mean of the data
df_y = df_y.fillna(df_y.mean())


# Population

In [210]:
# Read the Population data
df_pop = pd.read_csv("Data/County_pop_2019.csv")

# Fill NA with 0
df_pop = df_pop.fillna(0)


In [211]:
# Create a dictionary for the county and population
population_county = df_pop.set_index('FIPS')['pop'].to_dict()

# Create a dictionary for the county and YPLL
YPLL = dict(zip(df_y.FIPS, df_y.YPLL))

# Create a dictionary for the county and Unemployment
Unemployment = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Level))


# Create a dictionary for the county and Community Health Workers (CHW) demand
# Note that we assume a CHW can serve 55 Medicaid patient so the demand for CHW will be

#Medicaid_demand = dict(zip(df_m.FIPS, df_m.Med_Demand))


In [212]:
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

# COVID-19 Cases & COVID-19 Cases per Capita

What are COVID-19 Cases and COVID-19 Cases per Capita?

COVID-19 cases is an absolute metric of the total number of COVID-19 cases in a county over the last fourteen days.  COVID-19 cases per 100,000 is a relative metric calculated by dividing the number of COVID-19 cases by the estimated county population and multiplying by 100,000.  Cases include both confirmed cases, based on viral testing, and probable cases, based on specific criteria for symptoms and epidemiological exposure. We use NY Times Covid data. 


In [213]:
#### Data with the most recent date in NY Times dataset:

today = time.strftime('%Y-%m-%d')
covid_data_update_date = today#'2020-07-21'#today #or enter a specific date such as '2020-07-06'


In [214]:
## 14 day period defined
data_date_dt = pd.to_datetime(covid_data_update_date,infer_datetime_format = True)

N = 14

date_N_days_ago = data_date_dt - timedelta(days = N)

date_N1_days_ago = data_date_dt - timedelta(days = N+1)

In [215]:

# URL for mainland US data
url = "http://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
s = requests.get(url).content
covid = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [216]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths
722487,2020-11-11,Ozaukee,Wisconsin,55089.0,3757,31
722488,2020-11-11,Pepin,Wisconsin,55091.0,293,1
722489,2020-11-11,Pierce,Wisconsin,55093.0,1515,9
722490,2020-11-11,Polk,Wisconsin,55095.0,1158,5
722491,2020-11-11,Portage,Wisconsin,55097.0,3787,30
722492,2020-11-11,Price,Wisconsin,55099.0,542,3
722493,2020-11-11,Racine,Wisconsin,55101.0,11411,131
722494,2020-11-11,Richland,Wisconsin,55103.0,667,10
722495,2020-11-11,Rock,Wisconsin,55105.0,7241,59
722496,2020-11-11,Rusk,Wisconsin,55107.0,474,5


Note: Since NY data is seperately available, we first read the NY data for all 5 different borough then combine with the rest of US data.

In [217]:
# URL for NY
url = "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/trends/data-by-day.csv"
#"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/data-by-day.csv"

ny = requests.get(url).content
covid_ny = pd.read_csv(io.StringIO(ny.decode('utf-8')))


covid_ny.tail(5)

Unnamed: 0,date_of_interest,CASE_COUNT,HOSPITALIZED_COUNT,DEATH_COUNT,DEATH_COUNT_PROBABLE,CASE_COUNT_7DAY_AVG,HOSP_COUNT_7DAY_AVG,DEATH_COUNT_7DAY_AVG,BX_CASE_COUNT,BX_HOSPITALIZED_COUNT,BX_DEATH_COUNT,BX_CASE_COUNT_7DAY_AVG,BX_HOSPITALIZED_COUNT_7DAY_AVG,BX_DEATH_COUNT_7DAY_AVG,BK_CASE_COUNT,BK_HOSPITALIZED_COUNT,BK_DEATH_COUNT,BK_CASE_COUNT_7DAY_AVG,BK_HOSPITALIZED_COUNT_7DAY_AVG,BK_DEATH_COUNT_7DAY_AVG,MN_CASE_COUNT,MN_HOSPITALIZED_COUNT,MN_DEATH_COUNT,MN_CASE_COUNT_7DAY_AVG,MN_HOSPITALIZED_COUNT_7DAY_AVG,MN_DEATH_COUNT_7DAY_AVG,QN_CASE_COUNT,QN_HOSPITALIZED_COUNT,QN_DEATH_COUNT,QN_CASE_COUNT_7DAY_AVG,QN_HOSPITALIZED_COUNT_7DAY_AVG,QN_DEATH_COUNT_7DAY_AVG,SI_CASE_COUNT,SI_HOSPITALIZED_COUNT,SI_DEATH_COUNT,SI_CASE_COUNT_7DAY_AVG,SI_HOSPITALIZED_COUNT_7DAY_AVG,SI_DEATH_COUNT_7DAY_AVG,INCOMPLETE
249,11/04/2020,1027,53,7,1,689,49,8,175,10,0,113,11,1,291,22,5,227,16,4,178,8,0,112,8,1,254,9,1,164,11,1,129,4,1,73,4,1,6000
250,11/05/2020,979,67,9,2,745,53,9,165,11,4,123,11,2,267,20,4,239,17,5,179,10,0,122,8,1,274,17,1,181,12,1,94,9,0,79,5,1,6000
251,11/06/2020,820,48,9,1,773,52,9,128,8,3,125,10,2,187,15,2,240,18,4,143,13,0,128,9,0,252,11,3,196,11,1,109,1,1,84,4,0,6000
252,11/07/2020,584,51,7,4,802,54,9,88,14,1,129,11,2,149,8,3,245,17,5,84,7,0,133,9,0,189,19,2,207,12,1,74,3,1,88,4,1,6000
253,11/08/2020,685,33,5,4,833,52,8,95,6,2,135,11,2,180,7,0,240,16,4,124,1,0,141,8,0,189,9,2,220,13,2,97,10,1,96,4,1,6000


In [218]:
Kings = covid_ny[['date_of_interest', 'BK_CASE_COUNT', 'BK_DEATH_COUNT']]
Kings.rename(columns = {'BK_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BK_DEATH_COUNT': 'deaths'} , inplace=True)
Kings['county'] = 'Kings'
Kings['state'] = 'New York'
Kings['fips'] = 36047.0
#Kings.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [219]:
Bronx = covid_ny[['date_of_interest', 'BX_CASE_COUNT', 'BX_DEATH_COUNT']]
Bronx.rename(columns = {'BX_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BX_DEATH_COUNT': 'deaths'} , inplace=True)
Bronx['state'] = 'New York'
Bronx['county'] = 'Bronx'
Bronx['fips'] = 36005.0
#Bronx.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [220]:
Manhattan = covid_ny[['date_of_interest', 'MN_CASE_COUNT', 'MN_DEATH_COUNT']]
Manhattan.rename(columns = {'MN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'MN_DEATH_COUNT': 'deaths'} , inplace=True)
Manhattan['state'] = 'New York'
Manhattan['county'] = 'Manhattan'
Manhattan['fips'] = 36061.0
#Manhattan.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [221]:
Queens = covid_ny[['date_of_interest', 'QN_CASE_COUNT', 'QN_DEATH_COUNT']]
Queens.rename(columns = {'QN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'QN_DEATH_COUNT': 'deaths'} , inplace=True)
Queens['state'] = 'New York'
Queens['county'] = 'Queens'
Queens['fips'] = 36081.0
#Queens.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [222]:
Richmond = covid_ny[['date_of_interest', 'SI_CASE_COUNT', 'SI_DEATH_COUNT']]
Richmond.rename(columns = {'SI_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'SI_DEATH_COUNT': 'deaths'} , inplace=True)
Richmond['state'] = 'New York'
Richmond['county'] = 'Richmond'
Richmond['fips'] = 36085.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [223]:
covid.head(5)


Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [224]:
Bronx.tail(5)

Unnamed: 0,date,cases,deaths,state,county,fips
249,11/04/2020,175,0,New York,Bronx,36005.0
250,11/05/2020,165,4,New York,Bronx,36005.0
251,11/06/2020,128,3,New York,Bronx,36005.0
252,11/07/2020,88,1,New York,Bronx,36005.0
253,11/08/2020,95,2,New York,Bronx,36005.0


In [225]:
#Kings['deaths'] = Kings.groupby(by=['fips'])['deaths_d'].sum()
#Bronx['deaths'] = Bronx.groupby(by=['fips'])['deaths_d'].sum()
#Manhattan['deaths'] = Manhattan.groupby(by=['fips'])['deaths_d'].sum()
#Queens['deaths'] = Queens.groupby(by=['fips'])['deaths_d'].sum()
#Richmond['deaths'] = Richmond.groupby(by=['fips'])['deaths_d'].sum()

In [226]:
Kings.head(5)

Unnamed: 0,date,cases,deaths,county,state,fips
0,02/29/2020,0,0,Kings,New York,36047.0
1,03/01/2020,0,0,Kings,New York,36047.0
2,03/02/2020,0,0,Kings,New York,36047.0
3,03/03/2020,0,0,Kings,New York,36047.0
4,03/04/2020,1,0,Kings,New York,36047.0


In [227]:
#Kings = Kings.drop(['deaths_d'], axis=1)
#Bronx = Bronx.drop(['deaths_d'], axis=1)
#Manhattan = Manhattan.drop(['deaths_d'], axis=1)
#Queens = Queens.drop(['deaths_d'], axis=1)
#Richmond = Richmond.drop(['deaths_d'], axis=1)

In [228]:
Kings.tail(5)

Unnamed: 0,date,cases,deaths,county,state,fips
249,11/04/2020,291,5,Kings,New York,36047.0
250,11/05/2020,267,4,Kings,New York,36047.0
251,11/06/2020,187,2,Kings,New York,36047.0
252,11/07/2020,149,3,Kings,New York,36047.0
253,11/08/2020,180,0,Kings,New York,36047.0


In [229]:
covid['dt'] = pd.to_datetime(covid['date'], infer_datetime_format=True)
Kings['dt'] = pd.to_datetime(Kings['date'], infer_datetime_format=True)
Bronx['dt'] = pd.to_datetime(Bronx['date'], infer_datetime_format=True)
Manhattan['dt'] = pd.to_datetime(Manhattan['date'], infer_datetime_format=True)
Queens['dt'] = pd.to_datetime(Queens['date'], infer_datetime_format=True)
Richmond['dt'] = pd.to_datetime(Richmond['date'], infer_datetime_format=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

In [230]:
covid_death = covid[(covid['dt'] >= data_date_dt- timedelta(days = 1))]
Kings_death = Kings.groupby(by=['fips'])['deaths'].sum().reset_index()
Bronx_death = Bronx.groupby(by=['fips'])['deaths'].sum().reset_index()
Manhattan_death = Manhattan.groupby(by=['fips'])['deaths'].sum().reset_index()
Queens_death = Queens.groupby(by=['fips'])['deaths'].sum().reset_index()
Richmond_death = Richmond.groupby(by=['fips'])['deaths'].sum().reset_index()

In [231]:
# We merge the NY data with the rest of the US data
covid = pd.concat([covid, Kings, Bronx, Manhattan, Queens, Richmond], sort = False)


In [232]:

covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
204,09/20/2020,Richmond,New York,36085.0,21,0,2020-09-20
205,09/21/2020,Richmond,New York,36085.0,24,0,2020-09-21
206,09/22/2020,Richmond,New York,36085.0,25,0,2020-09-22
207,09/23/2020,Richmond,New York,36085.0,37,1,2020-09-23
208,09/24/2020,Richmond,New York,36085.0,26,0,2020-09-24
209,09/25/2020,Richmond,New York,36085.0,28,0,2020-09-25
210,09/26/2020,Richmond,New York,36085.0,33,0,2020-09-26
211,09/27/2020,Richmond,New York,36085.0,28,0,2020-09-27
212,09/28/2020,Richmond,New York,36085.0,46,0,2020-09-28
213,09/29/2020,Richmond,New York,36085.0,34,0,2020-09-29


In [233]:
print (data_date_dt)

2020-11-12 00:00:00


In [234]:

covid_death = covid_death[['fips', 'deaths']]
covid_death.head(2)

Unnamed: 0,fips,deaths
719293,1001.0,32
719294,1003.0,84


In [235]:
Kings_death.head(2)

Unnamed: 0,fips,deaths
0,36047.0,5780


In [236]:
# We merge the NY data with the rest of the US data
covid_death = pd.concat([covid_death, Kings_death, Bronx_death, Manhattan_death, Queens_death, Richmond_death], sort = False)

covid_death = covid_death.rename(columns={"deaths": "total_deaths"})


# Create a dictionary for the cumulative COVID deaths in each county
County_covid_death = dict(zip(covid_death.fips, covid_death.total_deaths))

covid_death.tail(250)

Unnamed: 0,fips,total_deaths
722292,51115.0,0
722293,51117.0,35
722294,51119.0,11
722295,51121.0,7
722296,51125.0,2
722297,51127.0,3
722298,51700.0,51
722299,51710.0,86
722300,51131.0,31
722301,51133.0,8


In [237]:
covid_death.shape

(3249, 2)

In [238]:
print (County_covid_death)

{1001.0: 32, 1003.0: 84, 1005.0: 9, 1007.0: 17, 1009.0: 34, 1011.0: 19, 1013.0: 41, 1015.0: 77, 1017.0: 48, 1019.0: 15, 1021.0: 36, 1023.0: 12, 1025.0: 18, 1027.0: 13, 1029.0: 11, 1031.0: 12, 1033.0: 37, 1035.0: 14, 1037.0: 3, 1039.0: 32, 1041.0: 30, 1043.0: 30, 1045.0: 55, 1047.0: 28, 1049.0: 34, 1051.0: 60, 1053.0: 31, 1055.0: 60, 1057.0: 15, 1059.0: 33, 1061.0: 8, 1063.0: 17, 1065.0: 30, 1067.0: 6, 1069.0: 36, 1071.0: 23, 1073.0: 442, 1075.0: 7, 1077.0: 49, 1079.0: 35, 1081.0: 65, 1083.0: 38, 1085.0: 29, 1087.0: 20, 1089.0: 117, 1091.0: 24, 1093.0: 33, 1095.0: 53, 1097.0: 334, 1099.0: 10, 1101.0: 221, 1103.0: 41, 1105.0: 6, 1107.0: 18, 1109.0: 14, 1111.0: 19, 1113.0: 3, 1117.0: 73, 1115.0: 54, 1119.0: 21, 1121.0: 40, 1123.0: 87, 1125.0: 147, 1127.0: 96, 1129.0: 12, 1131.0: 12, 1133.0: 21, 2013.0: 0, 2016.0: 0, 2020.0: 57, 2050.0: 1, 2060.0: 0, 2068.0: 0, 2070.0: 0, 2090.0: 17, 2100.0: 0, 2110.0: 2, 2122.0: 3, 2130.0: 0, 2150.0: 0, 2158.0: 0, 2164.0: 0, 2170.0: 5, 2180.0: 0, 2185.0: 

In [239]:
## subset last last 15 days
covid_last15 = covid[(covid['dt']>date_N1_days_ago) & (covid['dt']<= data_date_dt)].copy()
covid_last15['dt_time_delta'] = covid_last15['dt']-data_date_dt

In [240]:
## calculate new daily cases

## sort values by county and date
covid_last15.sort_values(by=['fips','dt'],inplace=True)
## remove data with 'unknown' counties
covid_last15 = covid_last15[covid_last15['fips'].notnull()].copy()

## calculate daily difference in number of cases
covid_last15['new_cases'] = covid_last15.groupby('fips')['cases'].transform(lambda x: x.diff())
## set negative new cases to zero, this can occuer due to the disperacy in the data

covid_last15.loc[covid_last15.new_cases < 1e-6, 'new_cases'] = 0
covid_last15.sort_index(inplace = True)

In [241]:
covid_last15[covid_last15.county == 'New Haven'].head(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt,dt_time_delta,new_cases
677434,2020-10-29,New Haven,Connecticut,9009.0,17156,1128,2020-10-29,-14 days,
680677,2020-10-30,New Haven,Connecticut,9009.0,17352,1128,2020-10-30,-13 days,196.0
683921,2020-10-31,New Haven,Connecticut,9009.0,17352,1128,2020-10-31,-12 days,0.0
687166,2020-11-01,New Haven,Connecticut,9009.0,17352,1128,2020-11-01,-11 days,0.0
690411,2020-11-02,New Haven,Connecticut,9009.0,18048,1128,2020-11-02,-10 days,696.0
693654,2020-11-03,New Haven,Connecticut,9009.0,18244,1130,2020-11-03,-9 days,196.0
696897,2020-11-04,New Haven,Connecticut,9009.0,18374,1132,2020-11-04,-8 days,130.0
700142,2020-11-05,New Haven,Connecticut,9009.0,18862,1135,2020-11-05,-7 days,488.0
703386,2020-11-06,New Haven,Connecticut,9009.0,19154,1137,2020-11-06,-6 days,292.0
706629,2020-11-07,New Haven,Connecticut,9009.0,19154,1137,2020-11-07,-5 days,0.0


In [242]:
## select just last 14 days now that we have daily new cases with 15th day as baseline
covid_last14 = covid_last15[(covid_last15['dt'] > date_N_days_ago) & (covid_last15['dt'] <= data_date_dt)].copy()


In [243]:
## group by FIPS to get case load and follow up demand values for each county
covid_last14_stats = covid_last14.groupby(['fips'])['new_cases'].sum().reset_index(name ='total_cases')

covid_last14_stats[covid_last14_stats.fips == 9009].head(5)

Unnamed: 0,fips,total_cases
310,9009.0,3736.0


In [244]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
204,09/20/2020,Richmond,New York,36085.0,21,0,2020-09-20
205,09/21/2020,Richmond,New York,36085.0,24,0,2020-09-21
206,09/22/2020,Richmond,New York,36085.0,25,0,2020-09-22
207,09/23/2020,Richmond,New York,36085.0,37,1,2020-09-23
208,09/24/2020,Richmond,New York,36085.0,26,0,2020-09-24
209,09/25/2020,Richmond,New York,36085.0,28,0,2020-09-25
210,09/26/2020,Richmond,New York,36085.0,33,0,2020-09-26
211,09/27/2020,Richmond,New York,36085.0,28,0,2020-09-27
212,09/28/2020,Richmond,New York,36085.0,46,0,2020-09-28
213,09/29/2020,Richmond,New York,36085.0,34,0,2020-09-29


In [245]:
## group by FIPS to get case load and follow up demand values for each county
#covid_death = covid.groupby(['fips'])['deaths'].sum().reset_index(name ='total_deaths')
#covid_death.head(250)

In [246]:
## group by FIPS to get case load each month and follow up demand values for each county
covid['month'] = pd.to_datetime(covid['date'])
covid['year'] = pd.to_datetime(covid['date'])


In [247]:
#covid_last14_stats_montly = covid.groupby(['fips', covid.month.dt.month, covid.year.dt.year])['deaths'].sum().reset_index() 

#covid_last14_stats_montly['cumulative_death'] = covid_last14_stats_montly.groupby(['fips'])['deaths'].cumsum(axis = 0) 
#covid_last14_stats_montly = covid_last14_stats_montly.groupby(['fips', covid_last14_stats_montly.month,  covid_last14_stats_montly.year])['deaths'].cumsum()

#covid_last14_stats_montly.head(10)

In [248]:
# adding population information from CDC svi dataset
covid_last14_stats = covid_last14_stats.reset_index()
covid_last14_stats['fips'] = covid_last14_stats['fips'].astype(int)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)
covid_last14_stats = pd.merge(left = covid_last14_stats, right = svi_county[['E_TOTPOP','FIPS', 'STATE']], how = 'right', right_on = 'FIPS', left_on = 'fips' )
covid_last14_stats.fillna(0 , inplace=True)

In [249]:

covid_last14_stats.tail(5)

Unnamed: 0,index,fips,total_cases,E_TOTPOP,FIPS,STATE
3215,0.0,0.0,0.0,2132,2105,ALASKA
3216,0.0,0.0,0.0,689,2282,ALASKA
3217,0.0,0.0,0.0,75,15005,HAWAII
3218,0.0,0.0,0.0,981,32009,NEVADA
3219,0.0,0.0,0.0,102,48301,TEXAS


In [250]:
now = pd.to_datetime("now")

print (now)
m_now = now.month
print (m_now)

y_now = now.year

2020-11-12 16:16:21.200765
11


In [251]:
#one_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-1) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#one_month_lag_death = dict(zip(one_month_lag_cumulative_death.fips, one_month_lag_cumulative_death.cumulative_death))

#one_month_lag_cumulative_death.head(5)


In [252]:
#two_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-2) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#two_month_lag_death = dict(zip(two_month_lag_cumulative_death.fips, two_month_lag_cumulative_death.cumulative_death))


#two_month_lag_cumulative_death.head(5)

In [253]:
#three_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now - 3) & (covid_last14_stats_montly['year'] == y_now)]


# Create a dictionary for the 3 last month COVID deaths in each county
#three_month_lag_death = dict(zip(three_month_lag_cumulative_death.fips, three_month_lag_cumulative_death.cumulative_death))

#three_month_lag_cumulative_death.head(5)

#print (three_month_lag_death)

In [254]:

# Create a dictionary for the states of the given the county FIPS
county_of_states = dict(zip(svi_county.FIPS, svi_county.STATE))

# Create a dictionary for the name of the given the county FIPS
county_name = dict(zip(svi_county.FIPS, svi_county.COUNTY))

# Create the list for county FIPS, we consider counties as analogy to the center for community health workers
location = svi_county.FIPS.tolist() #[k for k in SVI_county] #[9001, 9003, 9005, 9007, 9009, 9011, 9013, 9015]#[k for k in SVI_county]




In [255]:
for j in County_covid_death:
       
    print (j, County_covid_death[j])

1001.0 32
1003.0 84
1005.0 9
1007.0 17
1009.0 34
1011.0 19
1013.0 41
1015.0 77
1017.0 48
1019.0 15
1021.0 36
1023.0 12
1025.0 18
1027.0 13
1029.0 11
1031.0 12
1033.0 37
1035.0 14
1037.0 3
1039.0 32
1041.0 30
1043.0 30
1045.0 55
1047.0 28
1049.0 34
1051.0 60
1053.0 31
1055.0 60
1057.0 15
1059.0 33
1061.0 8
1063.0 17
1065.0 30
1067.0 6
1069.0 36
1071.0 23
1073.0 442
1075.0 7
1077.0 49
1079.0 35
1081.0 65
1083.0 38
1085.0 29
1087.0 20
1089.0 117
1091.0 24
1093.0 33
1095.0 53
1097.0 334
1099.0 10
1101.0 221
1103.0 41
1105.0 6
1107.0 18
1109.0 14
1111.0 19
1113.0 3
1117.0 73
1115.0 54
1119.0 21
1121.0 40
1123.0 87
1125.0 147
1127.0 96
1129.0 12
1131.0 12
1133.0 21
2013.0 0
2016.0 0
2020.0 57
2050.0 1
2060.0 0
2068.0 0
2070.0 0
2090.0 17
2100.0 0
2110.0 2
2122.0 3
2130.0 0
2150.0 0
2158.0 0
2164.0 0
2170.0 5
2180.0 0
2185.0 0
2188.0 0
2195.0 0
2198.0 1
2220.0 0
2230.0 0
2240.0 1
nan 0
2261.0 1
2275.0 0
2290.0 2
4001.0 187
4003.0 76
4005.0 153
4007.0 77
4009.0 31
4011.0 2
4012.0 18
4013.0 374

22031.0 35
22033.0 462
22035.0 6
22037.0 81
22039.0 40
22041.0 52
22043.0 31
22045.0 99
22047.0 62
22049.0 24
22051.0 592
22053.0 45
22059.0 11
22055.0 141
22057.0 131
22061.0 53
22063.0 82
22065.0 10
22067.0 29
22069.0 35
22071.0 602
22073.0 173
22075.0 19
22077.0 42
22079.0 165
22081.0 25
22083.0 21
22085.0 16
22087.0 32
22089.0 65
22091.0 4
22093.0 40
22095.0 108
22097.0 140
22099.0 65
22101.0 87
22103.0 277
22105.0 129
22107.0 3
22109.0 126
22111.0 54
nan 229
22113.0 70
22115.0 56
22117.0 72
22119.0 42
22121.0 42
22123.0 14
22125.0 25
22127.0 21
23001.0 11
23003.0 1
23005.0 70
23007.0 2
23009.0 1
23011.0 12
23013.0 2
23015.0 1
23017.0 2
23019.0 6
23021.0 0
23023.0 0
23025.0 11
nan 0
23027.0 16
23029.0 0
23031.0 23
24001.0 29
24003.0 284
24005.0 688
24510.0 519
24009.0 29
24011.0 9
24013.0 130
24015.0 37
24017.0 102
24019.0 13
24021.0 140
24023.0 1
24025.0 84
24027.0 130
24029.0 26
24031.0 899
24033.0 876
24035.0 26
24039.0 7
24037.0 60
24041.0 6
nan 20
24043.0 49
24045.0 54
24047.0

47019.0 42
47021.0 13
47023.0 18
47025.0 9
47027.0 17
47029.0 24
47031.0 35
47033.0 25
47035.0 30
47037.0 374
47041.0 19
47039.0 11
47043.0 23
47045.0 44
47047.0 34
47049.0 14
47051.0 28
47053.0 54
47055.0 39
47057.0 5
47059.0 63
47061.0 15
47063.0 55
47065.0 119
47067.0 3
47069.0 35
47071.0 25
47073.0 31
47075.0 28
47077.0 30
47079.0 13
47081.0 16
47083.0 19
47085.0 6
47087.0 7
47089.0 26
47091.0 16
47093.0 125
47095.0 5
47097.0 20
47099.0 24
47101.0 8
47103.0 8
47105.0 13
47111.0 26
47113.0 89
47115.0 15
47117.0 15
47119.0 45
47107.0 47
47109.0 30
47121.0 8
47123.0 35
47125.0 70
47127.0 3
47129.0 6
47131.0 34
47133.0 31
47135.0 16
47137.0 10
47139.0 13
47141.0 68
47143.0 28
47145.0 11
47147.0 44
47149.0 128
47151.0 5
47153.0 5
47155.0 26
47157.0 604
47159.0 17
47161.0 13
47163.0 74
47165.0 121
47167.0 31
47169.0 10
47171.0 14
47173.0 5
nan 0
47175.0 2
47177.0 12
47179.0 83
47181.0 7
47183.0 29
47185.0 17
47187.0 64
47189.0 67
48001.0 42
48003.0 11
48005.0 93
48007.0 18
48009.0 1
4801

In [256]:
# Create a dictionary for the county and covid cases
covid_cases_county_ny_times = dict(zip(covid_last14_stats.fips, covid_last14_stats.total_cases))
COVID_14days = {}

for j in location:
    if j in covid_cases_county_ny_times:
        COVID_14days[j] = covid_cases_county_ny_times[j] 
        print(j, covid_cases_county_ny_times[j])
    else:
        COVID_14days[j] = 0
        
        
    if j not in County_covid_death:
        print ('j',j)
        County_covid_death[j] = 0

#print (three_month_lag_death)
#print (two_month_lag_covid_death)
#print (one_month_lag_covid_death)
#for j in location:
#    if j not in three_month_lag_death:        
#        three_month_lag_death[j] = 0
        
#    if j not in two_month_lag_death:        
#        two_month_lag_death[j] = 0
        
#    if j not in one_month_lag_death:        
#        one_month_lag_death[j] = 0
        
    #if j not in County_covid_death:
    #    County_covid_death[j] = 0

35039 174.0
1001 225.0
1009 391.0
1013 43.0
1015 517.0
1017 128.0
1031 218.0
1033 455.0
1039 209.0
1043 733.0
1045 241.0
1051 244.0
1055 611.0
1067 66.0
1069 358.0
1071 310.0
1077 683.0
1079 143.0
1083 487.0
1089 1078.0
1095 546.0
1097 966.0
1103 753.0
1111 63.0
1113 48.0
1115 467.0
1117 934.0
1121 245.0
2261 42.0
4021 1196.0
5009 205.0
5011 15.0
5033 391.0
5037 56.0
5045 441.0
5047 101.0
5051 427.0
5053 50.0
5063 309.0
5083 81.0
5085 343.0
5087 63.0
5115 273.0
5117 68.0
5121 121.0
5125 658.0
5131 871.0
5145 549.0
6007 193.0
6017 188.0
6023 55.0
6027 15.0
6061 703.0
6079 581.0
6089 742.0
6093 100.0
8019 40.0
8039 109.0
8047 20.0
8051 64.0
8057 2.0
8065 38.0
8067 147.0
8071 22.0
8075 343.0
8077 1294.0
8083 98.0
8085 101.0
8093 36.0
8099 113.0
8101 1552.0
8103 12.0
8119 107.0
8121 36.0
9005 565.0
9007 450.0
9011 710.0
9015 293.0
10001 369.0
12019 599.0
12033 1092.0
12089 186.0
12101 1262.0
12109 713.0
12113 433.0
13015 441.0
13039 118.0
13045 623.0
13047 211.0
13057 968.0
13073 593.0
130

38003 170.0
38007 3.0
38009 113.0
38011 23.0
38021 138.0
38025 28.0
38027 88.0
38031 83.0
38033 13.0
38041 26.0
38043 37.0
38049 119.0
38055 165.0
38057 120.0
38059 729.0
38065 18.0
38067 194.0
38069 132.0
38073 101.0
38075 32.0
38081 56.0
38091 24.0
38095 47.0
38097 204.0
38099 471.0
39003 855.0
39005 216.0
39007 495.0
39011 419.0
39013 313.0
39015 174.0
39017 2193.0
39019 68.0
39021 159.0
39023 884.0
39025 1056.0
39027 232.0
39029 342.0
39033 227.0
39037 285.0
39039 333.0
39043 257.0
39045 866.0
39051 257.0
39053 138.0
39055 351.0
39057 992.0
39059 166.0
39063 430.0
39065 168.0
39067 38.0
39069 175.0
39071 193.0
39073 191.0
39077 224.0
39081 201.0
39083 254.0
39085 1522.0
39089 887.0
39091 300.0
39093 999.0
39099 985.0
39103 870.0
39107 512.0
39109 740.0
39111 39.0
39113 3326.0
39117 182.0
39119 453.0
39123 173.0
39125 148.0
39127 121.0
39133 561.0
39135 301.0
39137 555.0
39139 538.0
39141 359.0
39143 283.0
39147 444.0
39149 313.0
39151 1426.0
39153 1975.0
39155 850.0
39157 623.0
391

48093 32.0
48121 2174.0
48125 9.0
48129 7.0
48133 47.0
48147 89.0
48161 19.0
48167 654.0
48185 58.0
48199 43.0
48209 419.0
48213 337.0
48217 120.0
48231 246.0
48237 33.0
48239 40.0
48257 310.0
48265 46.0
48269 0.0
48285 76.0
48287 19.0
48289 45.0
48293 20.0
48295 12.0
48303 4928.0
48331 32.0
48349 210.0
48351 2.0
48353 85.0
48365 31.0
48379 56.0
48393 5.0
48397 270.0
48401 47.0
48417 4.0
48423 1086.0
48429 36.0
48433 3.0
48447 1.0
48451 1565.0
48485 1160.0
48493 28.0
48497 190.0
48499 67.0
49003 469.0
49005 954.0
49007 156.0
49011 2696.0
49013 124.0
49015 67.0
49019 56.0
49021 161.0
49027 36.0
49029 84.0
49033 6.0
49041 154.0
49043 329.0
49045 519.0
49047 190.0
49051 283.0
49057 2229.0
50001 13.0
50007 139.0
50009 13.0
50021 15.0
51001 28.0
51003 89.0
51036 11.0
51037 34.0
51087 649.0
51091 3.0
51103 12.0
51115 7.0
51117 52.0
51119 3.0
51125 8.0
51131 7.0
51133 8.0
51145 30.0
51147 43.0
51149 154.0
51157 3.0
51159 15.0
51161 460.0
51163 29.0
51169 161.0
51173 139.0
51179 265.0
51185 15

13107 107.0
13135 2514.0
13165 10.0
13167 12.0
13179 106.0
13183 16.0
13235 20.0
13269 26.0
13289 15.0
13307 2.0
13313 751.0
15003 1006.0
j 15005
16053 375.0
16073 52.0
17031 47813.0
17077 365.0
20081 31.0
20209 1003.0
21043 95.0
21135 94.0
21181 16.0
21195 387.0
21231 112.0
22013 39.0
22031 57.0
22049 53.0
22069 131.0
22071 520.0
22091 18.0
24039 83.0
25013 1947.0
25025 3054.0
26163 6855.0
28007 88.0
28029 72.0
28031 86.0
28041 44.0
28065 62.0
28097 68.0
28099 96.0
28113 70.0
28115 218.0
29119 92.0
29215 161.0
29221 289.0
29510 1490.0
31037 154.0
j 32009
32023 253.0
32027 6.0
34001 954.0
35041 188.0
36061 166.0
37005 50.0
37007 56.0
37065 179.0
37093 151.0
37107 213.0
37163 327.0
37181 134.0
37187 26.0
39075 332.0
40091 96.0
41045 159.0
45025 98.0
45027 95.0
45049 39.0
45065 19.0
47029 190.0
47129 67.0
48055 67.0
48105 3.0
48115 144.0
48137 6.0
48165 126.0
48273 49.0
48291 61.0
48313 31.0
48329 743.0
48345 2.0
48409 58.0
48413 10.0
48439 10728.0
48443 4.0
48449 61.0
48457 11.0
48471 1

In [257]:
print (covid_cases_county_ny_times)

{1001.0: 225.0, 1003.0: 641.0, 1005.0: 57.0, 1007.0: 100.0, 1009.0: 391.0, 1011.0: 17.0, 1013.0: 43.0, 1015.0: 517.0, 1017.0: 128.0, 1019.0: 101.0, 1021.0: 114.0, 1023.0: 12.0, 1025.0: 81.0, 1027.0: 63.0, 1029.0: 60.0, 1031.0: 218.0, 1033.0: 455.0, 1035.0: 52.0, 1037.0: 29.0, 1039.0: 209.0, 1041.0: 48.0, 1043.0: 733.0, 1045.0: 241.0, 1047.0: 128.0, 1049.0: 451.0, 1051.0: 244.0, 1053.0: 89.0, 1055.0: 611.0, 1057.0: 95.0, 1059.0: 187.0, 1061.0: 111.0, 1063.0: 18.0, 1065.0: 62.0, 1067.0: 66.0, 1069.0: 358.0, 1071.0: 310.0, 1073.0: 2574.0, 1075.0: 49.0, 1077.0: 683.0, 1079.0: 143.0, 1081.0: 345.0, 1083.0: 487.0, 1085.0: 29.0, 1087.0: 78.0, 1089.0: 1078.0, 1091.0: 67.0, 1093.0: 117.0, 1095.0: 546.0, 1097.0: 966.0, 1099.0: 45.0, 1101.0: 784.0, 1103.0: 753.0, 1105.0: 27.0, 1107.0: 140.0, 1109.0: 88.0, 1111.0: 63.0, 1113.0: 48.0, 1115.0: 467.0, 1117.0: 934.0, 1119.0: 28.0, 1121.0: 245.0, 1123.0: 145.0, 1125.0: 767.0, 1127.0: 302.0, 1129.0: 83.0, 1131.0: 26.0, 1133.0: 125.0, 2013.0: 4.0, 2016.0

In [258]:
#Parameters
pro_c_s = [(i,county_of_states[i]) for i in location ]
cartesian_pro_county_state = gp.tuplelist(pro_c_s)


In [259]:
df = covid_last14_stats
df['fips'] = df['fips'].astype(int)

In [260]:
###############################################################################################
######################## END calculating different types of vulnerabilities ###################

Since we allocate CHW proportional to the county values of certain vulnaribilites within state, we need a few function to help us with the calculations. 



In [261]:
# This function return the value for the state for the given dictionary

# More specifically sum upt the values for the counties of each state

def total_state(dict_1):
    state_dict = {}
    for s in State:
        state_dict [s] = sum(float(dict_1[j]) for (j,s) in cartesian_pro_county_state.select('*', s) if j in dict_1)  
    return state_dict



In [262]:
# Calculte the population per state by summing up the population in each county in the state
State_pop = total_state(population_county)

In [263]:


# This function returns the ratio of the dict value for county and state of the county
def Proportional(county_level, state_level):
    
    prop = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            prop[j] = (float(county_level[j])/float(state_level[s]))    
        else:
            prop[j] = 0
                
    return prop

In [264]:
print (ACI_total)
print (ACI_total[35039])

{1001: 9049, 1003: 30763, 1005: 7244, 1007: 4272, 1009: 9290, 1011: 2935, 1013: 4814, 1015: 25890, 1017: 7400, 1019: 5105, 1021: 9147, 1023: 3273, 1025: 6716, 1027: 3112, 1029: 3180, 1031: 9926, 1033: 9540, 1035: 2765, 1037: 2155, 1039: 6835, 1041: 3219, 1043: 15478, 1045: 10930, 1047: 13544, 1049: 17896, 1051: 12373, 1053: 6235, 1055: 21998, 1057: 4144, 1059: 6715, 1061: 6647, 1063: 3143, 1065: 4730, 1067: 3651, 1069: 22918, 1071: 11352, 1073: 126944, 1075: 3099, 1077: 15561, 1079: 6877, 1081: 23596, 1083: 14596, 1085: 3540, 1087: 5301, 1089: 48769, 1091: 5053, 1093: 6051, 1095: 20669, 1097: 90962, 1099: 5565, 1101: 52763, 1103: 22180, 1105: 3375, 1107: 4807, 1109: 6945, 1111: 4428, 1113: 14480, 1115: 12599, 1117: 20035, 1119: 4098, 1121: 19979, 1123: 9968, 1125: 34877, 1127: 13887, 1129: 4268, 1131: 4086, 1133: 5577, 2013: 444, 2016: 400, 2020: 49663, 2050: 8680, 2060: 141, 2068: 142, 2070: 1983, 2090: 12661, 2100: 606, 2105: 504, 2110: 4631, 2122: 11347, 2130: 2881, 2150: 2412, 2158

In [265]:
#ACI
ACI_State = total_state(ACI_total)

In [266]:
# Create dicts for the variables of SVI
E_POV = dict(zip(svi_county.FIPS, svi_county.EP_POV))
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.EP_UNEMP))
E_PCI = dict(zip(svi_county.FIPS, svi_county.EP_PCI))
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.EP_NOHSDP))
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.EP_AGE65))
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.EP_AGE17))
E_DISABL = dict(zip(svi_county.FIPS, svi_county.EP_DISABL))
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.EP_SNGPNT))
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.EP_MINRTY))
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.EP_LIMENG))
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.EP_MUNIT))
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.EP_MOBILE))
E_CROWD = dict(zip(svi_county.FIPS, svi_county.EP_CROWD))
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.EP_NOVEH))
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.EP_GROUPQ))


# Calculate the state value for the SVI variables
E_POV_State = total_state(E_POV)
E_UNEMP_State = total_state(E_UNEMP) 
E_PCI_State = total_state(E_PCI)
E_NOHSDP_State = total_state(E_NOHSDP) 
E_AGE65_State = total_state(E_AGE65)
E_AGE17_State = total_state(E_AGE17)
E_DISABL_State = total_state(E_DISABL)
E_SNGPNT_State = total_state(E_SNGPNT)
E_MINRTY_State = total_state(E_MINRTY)
E_LIMENG_State = total_state(E_LIMENG)
E_MUNIT_State = total_state(E_MUNIT)
E_MOBILE_State = total_state(E_MOBILE)
E_CROWD_State = total_state(E_CROWD)
E_NOVEH_State = total_state(E_NOVEH)
E_GROUPQ_State = total_state(E_GROUPQ)



# Calculate the proportinal values for the SVI variables
E_POV_Prop = Proportional(E_POV, E_POV_State )
E_UNEMP_Prop = Proportional(E_UNEMP, E_UNEMP_State ) 
E_PCI_Prop = Proportional(E_PCI, E_PCI_State )
E_NOHSDP_Prop = Proportional(E_NOHSDP, E_NOHSDP_State ) 
E_AGE65_Prop = Proportional(E_AGE65, E_AGE65_State )
E_AGE17_Prop = Proportional(E_AGE17, E_AGE17_State )
E_DISABL_Prop = Proportional(E_DISABL, E_DISABL_State )
E_SNGPNT_Prop = Proportional(E_SNGPNT, E_SNGPNT_State )
E_MINRTY_Prop = Proportional(E_MINRTY, E_MINRTY_State )
E_LIMENG_Prop = Proportional(E_LIMENG, E_LIMENG_State )
E_MUNIT_Prop = Proportional(E_MUNIT, E_MUNIT_State )
E_MOBILE_Prop = Proportional(E_MOBILE, E_MOBILE_State )
E_CROWD_Prop = Proportional(E_CROWD, E_CROWD_State )
E_NOVEH_Prop = Proportional(E_NOVEH, E_NOVEH_State )
E_GROUPQ_Prop = Proportional(E_GROUPQ, E_GROUPQ_State )

ACI_Prop = Proportional(ACI_total, ACI_State)


# SVI calculation 

We calculate the ratio of county value to state value by population for each SVI variables (we use EP-estimate percentage- values in the CDC data set), then we take the average of all 15 SVI variables. 

Let SVI variable set be K, where  

K = { Below Poverty, Unemployed, Income, No High School Diploma, Aged 65 or Older, Aged 17 or Younger, Civilian with a Disability, Single-Parent Households, Minority, Speaks English “Less than Well”, Multi-Unit Structures, Mobile Homes, Crowding, No Vehicle, Group Quarters }

We will use these variables in a county base and state base. While County base values are exactly same as the estimated values for these variables in the CDC website, to calculate the state base, we simply sum the county values for all of the counties in each state for each variable. Then we use the following formula to calculate the SVI value for each county.

Let $S$ is the set of states and $j$ is a county in the state $s$, where $s \in S$, $c^k_j$ SVI variable $k \in K$ value for county j, and $c_s$ SVI variable value for state s.

$SVI_j = \frac{1}{15}\sum_{k \in K} \frac{c^k_j}{c^k_s}$


In [267]:

from collections import Counter
# Sum all SVI variable values for each county
SVI_county_sum = dict(Counter(E_POV_Prop) + Counter(E_UNEMP_Prop) + Counter(E_PCI_Prop) + Counter(E_NOHSDP_Prop) + Counter(E_AGE65_Prop) + Counter(E_AGE17_Prop) + Counter(E_DISABL_Prop) + Counter(E_SNGPNT_Prop) + Counter(E_MINRTY_Prop) + Counter(E_LIMENG_Prop) + Counter(E_MUNIT_Prop) + Counter(E_MOBILE_Prop) + Counter(E_CROWD_Prop) + Counter(E_NOVEH_Prop) + Counter(E_GROUPQ_Prop))

# Divide the sum of all SVI variable values
SVI_county = {j: SVI_county_sum[j]/15 for j in SVI_county_sum }


# Proportional Allocation

We consider allocating 1 million CHW over the states proportional to Medicaid enrollment in each state. Further, we allocate CHW to counties in each state proportional to different county vulnerability criterias as follow.

- MEDICAID
- SVI
- YPLL
- UNEMPLOYMENT
- LAST 14 DAYS COVID CASES
- LAST 14 DAYS COVID CASES / POP
- COVID DEATHS / POP

To calculate the total number of allocated CHW to per county according to these vulnerability criterias, we define the following function called "Proportional_allocation", in which we multiply the CHW allocated to each state with the ratio of the chosen vulnerability criteria of the county to the chosen vulnerability criteria of the state, the function return a dictionary with the counties as keys and the number of CHW allocated to each county for the chosen vulnerability criteria as values. 

In [268]:
def Proportional_allocation(county_level, state_level, state_budget):
    prop_allocate = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            #print (j,s, county_level[j],state_level[s], state_budget[s])
            prop_allocate[j,s] = (float(county_level[j])/float(state_level[s]))*float(state_budget[s])
        
        else:
            prop_allocate[j,s] = 0
            
    
    return prop_allocate

In [269]:
Medicaid_dem = Proportional_allocation(ACI_total, ACI_State, Medicaid_state)
Medicaid_demand = {m[0]: Medicaid_dem[m] for m in Medicaid_dem}
for s in State:
#    print (s)
#    print ( ACI_State[s])
    print (s,Medicaid_state[s])
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

NEW MEXICO 772102
ALABAMA 957116
ALASKA 231145
ARIZONA 1839932
ARKANSAS 830467
CALIFORNIA 11847711
COLORADO 1337805
CONNECTICUT 874974
DELAWARE 239009
FLORIDA 3892552
GEORGIA 1928703
IDAHO 340742
ILLINOIS 2987496
INDIANA 1602976
IOWA 699741
KANSAS 401103
KENTUCKY 1416013
LOUISIANA 1585024
MAINE 232455
MARYLAND 1372695
MASSACHUSETTS 1616404
MICHIGAN 2439425
MINNESOTA 1085778
MISSISSIPPI 632427
MISSOURI 923641
MONTANA 247333
NEBRASKA 254159
NEVADA 685073
NEW HAMPSHIRE 193436
NEW JERSEY 1759653
NEW YORK 6263164
NORTH CAROLINA 1851558
NORTH DAKOTA 96757
OHIO 2788134
OKLAHOMA 797220
OREGON 1053931
PENNSYLVANIA 3069309
RHODE ISLAND 305208
SOUTH CAROLINA 1048276
SOUTH DAKOTA 114059
TENNESSEE 1489536
TEXAS 4457644
UTAH 338812
VERMONT 161049
VIRGINIA 1497770
WASHINGTON 1780968
WEST VIRGINIA 521290
WISCONSIN 1112844
WYOMING 59302
HAWAII 351337
DISTRICT OF COLUMBIA 248591
PUERTO RICO 1622194


In [270]:
#print ('Med', Medicaid_demand[35039])
#print ('Med', Medicaid_demand[4017], 'Pop', population_county[4017], 'Med_capita', 100000*(Medicaid_demand[4017]/population_county[4017]))

In [271]:
#Further we create additional vulnerability values by considering SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita and the number of Medicaid enrolles in each county together

Covid_capita = {j: 100000*(COVID_14days[j]/population_county[j]) for j in location}

Covid_death_capita = {j: 100000*(County_covid_death[j]/population_county[j]) for j in location}

Medicaid_capita = {j: 100000*(Medicaid_demand[j]/population_county[j]) for j in location}

Unemployment_capita = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Rate))

#Three_month_lag_covid_death_capita = {j: 100000*(three_month_lag_death[j]/population_county[j]) for j in location}

#Two_month_lag_covid_death_capita = {j: 100000*(two_month_lag_death[j]/population_county[j]) for j in location}

#One_month_lag_covid_death_capita = {j: 100000*(one_month_lag_death[j]/population_county[j]) for j in location}

In [272]:
for j in location:
    print (j, County_covid_death[j], population_county[j], Covid_death_capita[j])

35039 17 38921 43.678219984070296
1001 32 55869 57.27684404589307
1009 34 57826 58.79708089786602
1013 41 19448 210.81859317153436
1015 77 113605 67.77870692311079
1017 48 33254 144.3435376195345
1031 12 52342 22.926139620190288
1033 37 55241 66.97923643670462
1039 32 37049 86.372101811115
1043 30 83768 35.813198357367966
1045 55 49172 111.85227365167168
1051 60 81209 73.88343656491276
1055 60 102268 58.66937849571713
1067 6 17205 34.873583260680036
1069 36 105882 34.00011333371111
1071 23 51626 44.55119513423469
1077 49 92729 52.842152940288365
1079 35 32924 106.30543068885919
1083 38 98915 38.41682252438963
1089 117 372909 31.374946702814896
1095 53 96774 54.76677620021907
1097 334 413210 80.8305704121391
1103 41 119679 34.258307639602606
1111 19 22722 83.61939970073057
1113 3 57961 5.175894135711944
1115 54 89512 60.32710698006971
1117 73 217702 33.532075957042196
1121 40 79978 50.01375378229013
2261 1 9202 10.867202782003913
4021 234 462789 50.56299955271192
5009 40 37432 106.86044

27143 3 14865 20.181634712411707
27147 5 36649 13.642937051488445
27151 3 9266 32.37642995898985
27153 10 24664 40.5449237755433
27157 1 21627 4.623849817357932
27161 10 18612 53.72877713303245
27163 83 262440 31.62627648224356
27167 4 6207 64.4433703882713
27173 8 9709 82.39777526006797
28003 20 36953 54.122804643736636
28033 85 184945 45.959609613668924
28045 33 47632 69.281155525697
28057 31 23390 132.53527148354
28059 103 143617 71.7185291434858
28081 87 85436 101.83061004728685
28089 96 106272 90.33423667570008
28093 37 35294 104.8336827789426
28095 76 35252 215.5906047883808
28109 64 55535 115.24263977671738
28121 89 155271 57.31913879604047
28129 16 15916 100.52777079668257
28159 22 17955 122.52854358117517
28161 23 12108 189.9570531879749
29003 8 17712 45.16711833785004
29007 11 25388 43.327556325823224
29011 4 11754 34.03096818104475
29013 8 16172 49.46821667078902
29021 53 87364 60.66572043404606
29025 1 9020 11.086474501108647
29031 60 78871 76.0735885179597
29033 4 8679 46.

39007 48 97241 49.36189467405723
39011 21 45656 45.996145084983354
39013 28 67006 41.787302629615255
39015 4 43432 9.209799226376866
39017 135 383134 35.23571387556312
39019 10 26914 37.15538381511481
39021 4 38885 10.286742960010287
39023 62 134083 46.2400155127794
39025 41 206428 19.861646675838546
39027 14 41968 33.358749523446434
39029 88 101883 86.37358538715978
39033 13 41494 31.329830818913578
39037 55 51113 107.60471895603857
39039 16 38087 42.00908446451545
39043 59 74266 79.4441601809711
39045 59 157574 37.44272532270553
39051 18 42126 42.72895598917533
39053 13 29898 43.48116930898388
39055 51 93649 54.45867014063151
39057 55 168937 32.556515150618274
39059 8 38875 20.578778135048232
39063 32 75783 42.225829011783645
39065 17 31365 54.200542005420054
39067 3 15040 19.946808510638295
39069 20 27006 74.05761682589055
39071 16 43161 37.07050346377517
39073 10 28264 35.380696292103025
39077 18 58266 30.892801977139328
39081 4 65325 6.123230003827019
39083 18 62322 28.88225666698

55081 8 46253 17.296175383218387
55083 23 37930 60.63801740047456
55087 90 187885 47.90164196183836
55089 31 89221 34.745183308862266
55091 1 7287 13.723068478111704
55093 9 42754 21.050661926369465
55095 5 43783 11.419957517758034
55097 30 70772 42.3896456225626
55101 131 196311 66.73085053817667
55103 10 17252 57.964293994899144
55105 59 163354 36.117878962253755
55107 5 14178 35.265904923120324
55109 17 90687 18.745795979578112
55111 10 64442 15.517829986654666
55115 40 40899 97.80190224699871
55117 43 115340 37.28108201838044
55119 10 20343 49.1569581674286
55121 7 29649 23.609565246719956
55123 4 30822 12.977743170462656
55127 42 103868 40.43593792120769
55131 56 136034 41.16617904347443
55135 87 50990 170.62169052755442
55137 6 24443 24.546905044388986
55141 18 72999 24.657872025644185
56003 9 11790 76.33587786259542
56007 4 14800 27.027027027027028
56009 5 13822 36.17421501953408
56011 1 7584 13.185654008438817
56013 21 39261 53.488194391380766
56015 6 13211 45.41669820604042
56

21187 3 10901 27.520410971470508
21197 0 12359 0.0
21221 0 14651 0.0
21225 6 14381 41.72171615325777
21233 5 12942 38.63390511512904
22005 100 126604 78.98644592587912
22011 43 37497 114.67584073392538
22023 3 6973 43.02308905779435
22053 45 31368 143.4583014537108
22057 131 97614 134.2020611797488
22073 173 153279 112.86608080689463
22079 165 129648 127.26767863754165
22081 25 8442 296.1383558398484
22089 65 53100 122.4105461393597
22099 65 53431 121.65222436413318
22111 54 22108 244.255473131898
22121 42 26465 158.70017003589646
22123 14 10830 129.27054478301017
22125 25 15568 160.5858170606372
23003 1 67055 1.491313101185594
23015 1 34634 2.8873361436738465
24003 284 579234 49.0302710130966
24005 688 827370 83.15505759212928
24011 9 33406 26.94126803568221
24017 102 163257 62.47817857733513
24027 130 325690 39.915256839325735
24041 6 37181 16.137274414351417
24045 54 103609 52.11902440907644
25001 186 212990 87.32804357012066
25005 784 565217 138.70778833616114
25015 158 160830 98.2

48485 74 132230 55.96309460788021
48493 28 51070 54.82670843939691
48497 20 69984 28.57796067672611
48499 44 45539 96.62047914973978
49003 8 56046 14.273989223138138
49005 12 128289 9.353880691251783
49007 3 20463 14.660606949127693
49011 50 355481 14.065449348910349
49013 0 19938 0.0
49015 3 10012 29.96404314822213
49019 1 9754 10.25220422390814
49021 4 54839 7.294079031346304
49027 1 13188 7.582650894752806
49029 0 12124 0.0
49033 0 2483 0.0
49041 1 21620 4.6253469010175765
49043 1 42145 2.372760707082691
49045 8 72259 11.071285237825045
49047 0 35734 0.0
49051 10 34091 29.3332551113197
49057 37 260213 14.219120489752626
50001 2 36777 5.4381814721157244
50007 39 163774 23.81330369899984
50009 0 6163 0.0
50021 1 58191 1.7184788025639703
51001 21 32316 64.98329001114
51003 24 109330 21.951888777096862
51036 5 6963 71.80812868016659
51037 1 11880 8.417508417508419
51087 243 330818 73.45428604247653
51091 0 2190 0.0
51103 1 10603 9.431293030274452
51115 0 8834 0.0
51117 35 30587 114.4276

12029 11 16826 65.37501485795792
12107 54 74521 72.46279572201125
13005 21 11164 188.1046219992834
13017 37 16700 221.5568862275449
13021 222 153159 144.94740759602766
13069 70 43273 161.76368636332126
13081 29 22372 129.62631861255142
13107 44 22646 194.2947981983573
13135 485 936250 51.802403204272366
13165 33 8676 380.35961272475794
13167 27 9643 279.995851913305
13179 28 61435 45.576625701961426
13183 5 19559 25.563679124699625
13235 24 11137 215.4978899164946
13269 13 8020 162.09476309226932
13289 11 8120 135.4679802955665
13307 2 2607 76.71653241273494
13313 70 104628 66.90369690713767
15003 173 974563 17.751546077575284
15005 0 86 0.0
16053 9 24412 36.86711453383582
16073 5 11823 42.2904508162057
17031 5881 5150233 114.18900853611866
17077 30 56750 52.863436123348016
20081 1 3968 25.201612903225804
20209 167 165429 100.949652116618
21043 4 26797 14.927044072097623
21135 17 13275 128.060263653484
21181 3 7269 41.27115146512588
21195 5 57876 8.63915958255581
21231 2 20333 9.836226

21127 1 15317 6.528693608408957
21133 2 21553 9.279450656521133
21153 1 12161 8.223007976317737
21175 0 13309 0.0
22029 17 19259 88.27041902487149
22041 52 20015 259.8051461403947
22097 140 82124 170.47391749062393
22107 3 4334 69.2201199815413
22117 72 46194 155.86439797376283
28069 15 9742 153.97249024840895
28125 16 4321 370.28465632955334
28151 105 43909 239.1309298777016
29069 13 29131 44.625999794033845
29133 6 13180 45.52352048558422
30005 8 6681 119.74255350995361
31173 4 7224 55.370985603543744
32015 4 5532 72.30657989877079
34013 2161 798975 270.4715416627554
35025 34 71070 47.840157591107356
35047 0 27277 0.0
37061 55 58741 93.63136480482116
37091 36 23677 152.04628964818178
37165 33 34823 94.7649541969388
37177 3 4016 74.7011952191235
38079 11 14176 77.59593679458239
45011 25 20866 119.81213457298956
45061 36 16828 213.92916567625386
46095 1 2061 48.5201358563804
47097 20 25633 78.02442164397456
48107 13 5737 226.59926791005753
48135 125 166223 75.20018288684479
48145 10 17

In [273]:
County_covid_death[9009], population_county[9009], Covid_death_capita[9009]

(1151, 854757, 134.65815430584365)

In [274]:
# Dictionary for the total Covid per capita for each state
Total_covid_cap = total_state(Covid_capita) 


# Dictionary for the total Covid death per capita for each state
Total_covid_death_cap = total_state(Covid_death_capita) 


# Dictionary for the total Medicaid per capita for each state
Total_medicaid_cap = total_state(Medicaid_capita)


# Dictionary for the total Unemployment per capita for each state
Total_unemployment_cap = total_state(Unemployment_capita)



In [275]:
# Dictionaries for the different vulnerability criteria values for states

# Dictionary for total Medicaid patient numbers for each state
Medicaid_demand_state = total_state(Medicaid_demand) 

# Dictionary for total positive COVID cases for last 14 days in each state
Covid_state = total_state(COVID_14days) 

# Dictionary for total SVI values for each state
SVI_state = total_state(SVI_county) 

# Dictionary for total YPLL values for each state
YPLL_state = total_state(YPLL) 

# Dictionary for total Unemployment numbers for each state
Unemployment_state = total_state(Unemployment)  

In [276]:
print(Medicaid_demand_state)

{'NEW MEXICO': 772102.0, 'ALABAMA': 957115.9999999999, 'ALASKA': 231145.00000000006, 'ARIZONA': 1839932.0, 'ARKANSAS': 830466.9999999999, 'CALIFORNIA': 11847710.999999998, 'COLORADO': 1337804.9999999998, 'CONNECTICUT': 874974.0, 'DELAWARE': 239009.0, 'FLORIDA': 3892552.0, 'GEORGIA': 1928703.000000001, 'IDAHO': 340741.9999999999, 'ILLINOIS': 2987495.999999999, 'INDIANA': 1602975.9999999995, 'IOWA': 699741.0000000003, 'KANSAS': 401103.00000000006, 'KENTUCKY': 1416012.9999999995, 'LOUISIANA': 1585023.9999999998, 'MAINE': 232455.00000000003, 'MARYLAND': 1372695.0, 'MASSACHUSETTS': 1616404.0, 'MICHIGAN': 2439425.0000000005, 'MINNESOTA': 1085777.9999999998, 'MISSISSIPPI': 632427.0000000002, 'MISSOURI': 923641.0000000002, 'MONTANA': 247333.0, 'NEBRASKA': 254158.99999999997, 'NEVADA': 685073.0000000001, 'NEW HAMPSHIRE': 193436.0, 'NEW JERSEY': 1759653.0000000002, 'NEW YORK': 6263164.0, 'NORTH CAROLINA': 1851558.0000000005, 'NORTH DAKOTA': 96757.00000000001, 'OHIO': 2788134.0, 'OKLAHOMA': 79722

# 1 million CHW allocation to states

We allocate 1 million CHWs to states proportional to total Medicaid enrolles in each state.

Let's $FedCHW$ represents the number of CHW will be allocated within states by the federal government, which is 1 million in our project. $TotMed$ represents the total Medicaid enrollee numbers over the US, $Med_s$ is the total Medicaid enrollee numbers in state $s \in S$, and $CHW_s$ is the total number of CHW allocated to state $ s\in S$. 

$CHW_s = FedCHW*\frac{Med_s}{TotMed}$



In [277]:
# We consider allocation of 1 million CHW all over the US
Federal_budget_CHW = 1000000

# First, we calculate the Total Medicaid enrolles all over the US
Total_federal_need = sum(Medicaid_demand_state[s] for s in State)

# Allocate the 1 million CHWs proportional to Medicaid enrolles in each state
Medicaid_budget_state = {s: (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW  for s in State}

In [278]:
for s in State:
    print (s, Medicaid_demand_state[s], Total_federal_need, Federal_budget_CHW, (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW)

NEW MEXICO 772102.0 76256043.0 1000000 10125.12542776446
ALABAMA 957115.9999999999 76256043.0 1000000 12551.3462585516
ALASKA 231145.00000000006 76256043.0 1000000 3031.1696084204114
ARIZONA 1839932.0 76256043.0 1000000 24128.343507149984
ARKANSAS 830466.9999999999 76256043.0 1000000 10890.5073923125
CALIFORNIA 11847710.999999998 76256043.0 1000000 155367.50313676777
COLORADO 1337804.9999999998 76256043.0 1000000 17543.593233653624
CONNECTICUT 874974.0 76256043.0 1000000 11474.159497103725
DELAWARE 239009.0 76256043.0 1000000 3134.295861640762
FLORIDA 3892552.0 76256043.0 1000000 51045.81678857897
GEORGIA 1928703.000000001 76256043.0 1000000 25292.461084035018
IDAHO 340741.9999999999 76256043.0 1000000 4468.393409818024
ILLINOIS 2987495.999999999 76256043.0 1000000 39177.17052273483
INDIANA 1602975.9999999995 76256043.0 1000000 21020.97010200227
IOWA 699741.0000000003 76256043.0 1000000 9176.203910816621
KANSAS 401103.00000000006 76256043.0 1000000 5259.950349115283
KENTUCKY 1416012.99

In [279]:
print (Covid_state)

{'NEW MEXICO': 14131.0, 'ALABAMA': 19497.0, 'ALASKA': 5759.0, 'ARIZONA': 22685.0, 'ARKANSAS': 16631.0, 'CALIFORNIA': 72609.0, 'COLORADO': 40895.0, 'CONNECTICUT': 14132.0, 'DELAWARE': 2787.0, 'FLORIDA': 63164.0, 'GEORGIA': 30024.0, 'IDAHO': 14342.0, 'ILLINOIS': 127985.0, 'INDIANA': 51754.0, 'IOWA': 44880.0, 'KANSAS': 27145.0, 'KENTUCKY': 24945.0, 'LOUISIANA': 8007.0, 'MAINE': 1734.0, 'MARYLAND': 14962.0, 'MASSACHUSETTS': 20611.0, 'MICHIGAN': 62132.0, 'MINNESOTA': 52228.0, 'MISSISSIPPI': 10813.0, 'MISSOURI': 41513.0, 'MONTANA': 11328.0, 'NEBRASKA': 21608.0, 'NEVADA': 15054.0, 'NEW HAMPSHIRE': 2294.0, 'NEW JERSEY': 28849.0, 'NEW YORK': 25036.0, 'NORTH CAROLINA': 31364.0, 'NORTH DAKOTA': 16247.0, 'OHIO': 58419.0, 'OKLAHOMA': 22117.0, 'OREGON': 8982.0, 'PENNSYLVANIA': 40057.0, 'RHODE ISLAND': 6079.0, 'SOUTH CAROLINA': 14434.0, 'SOUTH DAKOTA': 15708.0, 'TENNESSEE': 35406.0, 'TEXAS': 112163.0, 'UTAH': 29076.0, 'VERMONT': 394.0, 'VIRGINIA': 18375.0, 'WASHINGTON': 16141.0, 'WEST VIRGINIA': 6759

# Proportional allocation for different vulnerability values

Let V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}. We assume $v_j$ represent the vulnerability value for county $j \in J$, while $v_s$ represent the sum of the vulnerability values for each county in the state of county j.

$Prop_{v_j} = \frac{v_j}{v_s}*CHW_s$



In [280]:
# Calling proportional allocation function for different vulnerability criterias

# Proportional allocation according to cumulative Covid death in per capita in each county
Proportional_to_covid_death_cap = Proportional_allocation(Covid_death_capita, Total_covid_death_cap,Medicaid_budget_state)

# Propportional allocation according to Medicaid enrollee number in each county
Proportional_to_medicaid = Proportional_allocation(Medicaid_demand, Medicaid_demand_state,Medicaid_budget_state )

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_medicaid_cap = Proportional_allocation(Medicaid_capita, Total_medicaid_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases in each county
Proportional_to_covid = Proportional_allocation(COVID_14days, Covid_state, Medicaid_budget_state)

# Proportional allocation according to SVI score in each county
Proportional_to_SVI = Proportional_allocation(SVI_county, SVI_state, Medicaid_budget_state)

# Proportional allocation according to YPLL in each county
Proportional_to_YPLL = Proportional_allocation(YPLL, YPLL_state, Medicaid_budget_state)

# Proportional allocation according to Unemployment  in each county
Proportional_to_unemployment = Proportional_allocation(Unemployment, Unemployment_state, Medicaid_budget_state)

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_unemployment_cap = Proportional_allocation(Unemployment_capita, Total_unemployment_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases per capita in each county
Proportional_to_covid_capita = Proportional_allocation(Covid_capita, Total_covid_cap, Medicaid_budget_state)



# Normalize values for comparison
To be able compare the different vulnerability values for each county, we normalize all vulnerability values as follows. 

Let $m_{v_s} = \min \{v_j, \text{ for county j in state s }\}$  and 
$M_{v_s} = \max \{v_j, \text{ for county j in state s }\}$.

We calculate the normalize value for each vulnerability for each county by substracting the min vulnerability in the state of the county and dividing that by the differences between max and min value of the vulnerability values in the state. Mathematical formulation for the normalization is as follows.

$N_{v_j} = \frac{v_j - m_{v_s}}{M_{v_s} - m_{v_s}}$ 

for each $v \in V$, where V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}, j is a county in each state $s \in S$.


In [281]:
#Normalize function to normalize the vulnerability values to be able to compare them

def normalize(dict_1):
    
    result = {}
    min_data = {s: min(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    max_data = {s: max(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    
    for (j,s) in cartesian_pro_county_state:
        
        if (max_data[s] - min_data[s]) != 0 :
    
            result[j] = (dict_1[j] - min_data[s])/(max_data[s] - min_data[s])
        
        else:
            result[j] = 1
    
    return result 

# Percentile Rank

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.percentileofscore.html"> The function scipy.stats.percentileofscore (a, score, kind='rank')   </a>
computes the percentile rank of a score relative to a list of scores. 
"rank": Average percentage ranking of score. In case of multiple matches, average the percentage rankings of all matching scores.

In [282]:
from scipy import stats

# Calculate percentile ranks

def percentile_ranks(data):
    x = {s: [] for s in State}

    for (j,s) in cartesian_pro_county_state:
         
        x[s].append(data[j])
    
    
    
    percentile_ranks = {i: stats.percentileofscore(x[s], data[i], 'rank') for (i,s) in cartesian_pro_county_state}

    return percentile_ranks

In [283]:
# Write timestamp 

time_stamp = time.strftime('%m-%d-%Y %H:%M:%S')
with open('Output/time_stamp.csv','w') as f:
    w = csv.writer(f)
    now = time.strftime('%m/%d/%Y %H:%M:%S')
    w.writerow(['time',now])
    

In [284]:
#print (Medicaid_demand)

In [285]:
#Write a function to order the dicts
def order_k(dict_1):
    dict_2 = {}
    for m in location:
        if m in dict_1.keys():
            dict_2[m] = dict_1[m]
        else:
            dict_2[m] = 0
    
    return dict_2
            

In [286]:


Medicaid_demand = order_k(Medicaid_demand)#{m: Medicaid_demand[m] for m in location}
COVID_14days = order_k(COVID_14days)#{m: COVID_14days[m] for m in location}
SVI_county = order_k(SVI_county)#{m: SVI_county[m] for m in location}
YPLL = order_k(YPLL)#{m: YPLL[m] for m in location}
Unemployment = order_k(Unemployment)#{m: Unemployment[m] for m in location}
Unemployment_capita = order_k(Unemployment_capita)#{m: Unemployment_capita[m] for m in location}

In [287]:
# Write file allocation with each strategies for each county 

Strategies = ["Medicaid_demand", "Medicaid_capita", "Covid", "SVI"
              , "YPLL","Unemployment", "Unemployment_capita", "Covid_capita",  "Covid_death_capita" ]

fieldnames = []  
fieldnames.append('County_FIPS')


SVI_values = {i:SVI_county[i] for i in location}
s_count = 1
for s in Strategies:   
    fieldnames.append('Proportional_allocation_to_' + s)
    fieldnames.append(s)
    fieldnames.append('Percentile_ranks_' + s)

    
        

writefile = 'Output/County_level_proportional_allocation_for_all_policies.csv'
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(fieldnames)
    for row in zip(location
                   , Proportional_to_medicaid.values(),         Medicaid_demand.values(),      percentile_ranks(Medicaid_demand).values()
                   , Proportional_to_medicaid_cap.values(),     Medicaid_capita.values(),      percentile_ranks(Medicaid_capita).values()
                   , Proportional_to_covid.values(),            COVID_14days.values(),         percentile_ranks(COVID_14days).values()
                   , Proportional_to_SVI.values(),              SVI_county.values(),           percentile_ranks(SVI_county).values()
                   , Proportional_to_YPLL.values(),             YPLL.values(),                 percentile_ranks(YPLL).values()
                   , Proportional_to_unemployment.values(),     Unemployment.values(),         percentile_ranks(Unemployment).values()
                   , Proportional_to_unemployment_cap.values(), Unemployment_capita.values(),  percentile_ranks(Unemployment_capita).values()
                   , Proportional_to_covid_capita.values(),     Covid_capita.values(),         percentile_ranks(Covid_capita).values()
                   , Proportional_to_covid_death_cap.values(),  Covid_death_capita.values(),   percentile_ranks(Covid_death_capita).values() ):                    
       
        writer.writerow(row)

In [288]:
writefile = 'Output/State_level_allocation.csv'

cl = ['State', 'CHW_allocation']
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(cl)
    for row in zip( State, Medicaid_demand_state.values()):
        writer.writerow(row)