In [1]:
# Suzan Iloglu, May 21,2020
# Import packages
import csv
import gurobipy as gp
from itertools import product
import geopandas as gpd
import pandas as pd
import numpy as np
import math
import time
import requests
import io
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
pd.options.display.max_columns =200
from IPython.display import Image


# MAPPING THE NEW POLITICS OF CARE: COMMUNITY HEALTH WORKERS
The project presents multiple options for how individual workers in such a Community Health Corps might be distributed within each state. It shows that what you choose to prioritize greatly impacts where care would be sent. We can define communities in greatest need in many ways: we can think about our current crisis and send people to where the COVID19 pandemic rages most fiercely; we can think of long term measures of social and economic inequality embedded in metrics like the Centers for Disease Control and Prevention’s Social Vulnerability Index; we can focus on the places with too many people dying too young and use the County Health Rankings Years-of-Potential-Life-Lost measure; we can think of joblessness and how the pandemic has thrown many into unemployment and target our resources in this way. 

The followings are our options to choose to define vulnerability:


- SOCIAL VULNERABILITY INDEX
- MEDICAID 
- UNEMPLOYMENT
- YEARS OF POTENTIAL LIFE LOST
- TOTAL COVID CASES
- COVID CASES BY POPULATION
- COVID DEATHS BY POPULATION

We will start with Social Vulnerability Index (SVI) from CDC website.

### I. Importing SVI data which includes the variables for calculating county SVI for each state
The CDC uses both a USA-wide and a state by state SVI scores. For our project given that funding is likely going to be managed at a state level, using a state by state SVI scores makes the most sense and will be most sensitive to regional socioeconomic differences. Even though the CDC SVI scores are calculated using percentile rankings, the data sets include raw data estimates for each variables. The following table shows the variablaes used in the method of calculating SVI scores. 




      American Community Survey (ACS), 2014-2018 (5-year) data for the following estimates:
<img src="Data/img/SVI_comp.png" width="500">


Note: Full documentation for 2018 data is available <a href="https://svi.cdc.gov/data-and-tools-download.html">here</a> 
This part of the code shows preliminary mapping of <a href = "https://svi.cdc.gov/">the CDC's Social Vulnerability Index</a>.

Later in the notebook, we will provide the formula to create the SVI value we use in our project. First, we import the data for the US mainland and Puerto Rico.

In [2]:
## import svi data downloaded from CDC website as cited above

## 48 state SVI scores by county
svi_counties_mainland = gpd.read_file("Data/SVI2018_US_COUNTY/SVI2018_US_county.shp")

## Puerto Rico SVI scores by county
svi_counties_puerto_rico = gpd.read_file("Data/PuertoRico_COUNTY/SVI2018_PuertoRico_county.shp")

## Merge 48 states and Puerto Rico SVI 
svi_counties = pd.concat([svi_counties_mainland,svi_counties_puerto_rico ], sort = False)


In [3]:
## Replacing -999 values with 0 for calculations
svi_county = svi_counties.fillna(0)
svi_county  = svi_county.replace(-999, 0)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)

In [4]:
## Create the list for State
State = svi_county.STATE.unique().tolist()

In [5]:
# Create a seperate dictionary for the variables to calculate SVI

# Persons below poverty estimate, 2014-2018 ACS
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Civilian (age 16+) unemployed estimate, 2014-2018 ACS
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.E_UNEMP))

# Per capita income estimate, 2014-2018 ACS
E_PCI = dict(zip(svi_county.FIPS, svi_county.E_PCI))

# Persons (age 25+) with no high school diploma estimate, 2014-2018 ACS
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.E_NOHSDP))

# Persons aged 65 and older estimate
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.E_AGE65))

# Persons aged 17 and younger estimate
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.E_AGE17))

# Population with a disability estimate
E_DISABL = dict(zip(svi_county.FIPS, svi_county.E_DISABL))

# Single parent households with children under 18 estimate
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.E_SNGPNT))

# Minority (all persons except white, nonHispanic) estimate, 2014-2018 ACS
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.E_MINRTY))

# Persons (age 5+) who speak English "less than well" estimate, 2014-2018 ACS
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.E_LIMENG))

# Housing in structures with 10 or more units estimate, 2014-2018 ACS
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.E_MUNIT))

# Mobile homes estimate MOE, 2014-2018 ACS
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.E_MOBILE))

# At household level (occupied housing units), more people than rooms estimate, 2014-2018 ACS
E_CROWD = dict(zip(svi_county.FIPS, svi_county.E_CROWD))

# Households with no vehicle available estimate, 2014-2018 ACS
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.E_NOVEH))

# Persons in institutionalized group quarters estimate, 2014-2018 ACS
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.E_GROUPQ))

# Percentage of persons below poverty estimate
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Medicaid 
Medicaid is a means-tested health insurance program for low-income children, pregnant women, adults, seniors, and people with disabilities. Medicaid is jointly funded by federal and state governments and managed by states within federal standards and a wide range of state options. <a href="https://data.medicaid.gov/Enrollment/State-Medicaid-and-CHIP-Applications-Eligibility-D/n5ce-jxme"> Data Source for Medicaid Enrollment </a> 

In [6]:

import sodapy
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.medicaid.gov", None)


# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("83yt-67it", limit=4000)


# Read the medicaid demand data
#df_mm = pd.read_csv("Data/2020_06_Preliminary_applications__eligibility_determinations__and_enrollment_data.csv")

# Convert to pandas DataFrame
df_mm = pd.DataFrame.from_records(results)
df_mm.head(5)
df_mm.columns




Index(['applications_for_financial_assistance_submitted_to_the_state_based_marketplace',
       'applications_for_financial_assistance_submitted_to_the_state_based_marketplace_footnotes',
       'final_report', 'geocoded_column',
       'individuals_determined_eligible_for_chip_at_application',
       'individuals_determined_eligible_for_chip_at_application_footnotes',
       'individuals_determined_eligible_for_medicaid_at_application',
       'individuals_determined_eligible_for_medicaid_at_application_footnotes',
       'latitude', 'longitude', 'medicaid_and_chip_child_enrollment',
       'medicaid_and_chip_child_enrollment_footnotes',
       'new_applications_submitted_to_medicaid_and_chip_agencies',
       'new_applications_submitted_to_medicaid_and_chip_agencies_footnotes',
       'preliminary_updated', 'report_date', 'state_abbreviation',
       'state_expanded_medicaid', 'state_name',
       'total_applications_for_financial_assistance_submitted_at_state_level',
       'total_a

In [7]:
df_mm['State Name'] = df_mm['state_name'].str.upper() 

In [8]:
Medicaid_state = dict(zip(df_mm['State Name'], df_mm['total_medicaid_and_chip_enrollment']))
Medicaid_state['PUERTO RICO'] = 1622194
print (Medicaid_state)

{'ALABAMA': '957116', 'ALASKA': '231145', 'ARIZONA': '1839932', 'ARKANSAS': '830467', 'CALIFORNIA': '11847711', 'COLORADO': '1337805', 'CONNECTICUT': '874974', 'DELAWARE': '239009', 'DISTRICT OF COLUMBIA': '248591', 'FLORIDA': '3892552', 'GEORGIA': '1928703', 'HAWAII': '351337', 'IDAHO': '340742', 'ILLINOIS': '2987496', 'INDIANA': '1602976', 'IOWA': '699741', 'KANSAS': '401103', 'KENTUCKY': '1416013', 'LOUISIANA': '1585024', 'MAINE': '232455', 'MARYLAND': '1372695', 'MASSACHUSETTS': '1616404', 'MICHIGAN': '2439425', 'MINNESOTA': '1085778', 'MISSISSIPPI': '632427', 'MISSOURI': '923641', 'MONTANA': '247333', 'NEBRASKA': '254159', 'NEVADA': '685073', 'NEW HAMPSHIRE': '193436', 'NEW JERSEY': '1759653', 'NEW MEXICO': '772102', 'NEW YORK': '6263164', 'NORTH CAROLINA': '1851558', 'NORTH DAKOTA': '96757', 'OHIO': '2788134', 'OKLAHOMA': '797220', 'OREGON': '1053931', 'PENNSYLVANIA': '3069309', 'RHODE ISLAND': '305208', 'SOUTH CAROLINA': '1048276', 'SOUTH DAKOTA': '114059', 'TENNESSEE': '1489536

In [9]:
df_mmm = pd.read_csv("Data/ACSST5Y2018.S2704_data_with_overlays_2020-08-01T140649.csv", header=[1])
df_mmm.head(1)
#df_mmm.dtypes

Unnamed: 0,id,Geographic Area Name,Estimate!!Total!!Civilian noninstitutionalized population,Margin of Error!!Total MOE!!Civilian noninstitutionalized population,Estimate!!Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Percent Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Percent Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Total!!Medicare coverage alone or in combination,Margin of Error!!Total MOE!!Medicare coverage alone or in combination,Estimate!!Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Total!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Total!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Total!!VA health care coverage alone or in combination,Margin of Error!!Total MOE!!VA health care coverage alone or in combination,Estimate!!Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Total!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Total!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Total!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,"Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)",Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone
0,0500000US01001,"Autauga County, Alabama",54277,219,18191,965,33.5,1.8,(X),(X),10026,410,18.5,0.8,14134,125,36,59,0.3,0.4,32229,261,2155,369,6.7,1.1,7914,152,7835,155,99.0,0.7,(X),(X),9049,859,16.7,1.6,14134,125,5352,682,37.9,4.9,32229,261,2788,452,8.7,1.4,7914,152,909,183,11.5,2.4,(X),(X),1701,290,3.1,0.5,14134,125,8,13,0.1,0.1,32229,261,989,252,3.1,0.8,7914,152,704,156,8.9,2.0,12303,1299,7466,880,60.7,4.9,41915,1301,10666,650,25.4,1.5,17605,763,1001,233,5.7,1.4,3974,267,1595,316,40.1,7.6,10160,310,3801,489,37.4,5.1,4406,284,560,235,12.7,5.3,6224,268,550,209,8.8,3.3,7042,206,1095,224,15.5,3.2,7771,134,1361,253,17.5,3.3,6786,87,1363,239,20.1,3.5,4697,68,4649,73,99.0,0.7,3217,138,3217,138,100.0,1.0,(X),(X),9513,834,17.5,1.5,(X),(X),2884,414,5.3,0.8,(X),(X),6503,810,12.0,1.5,(X),(X),126,101,0.2,0.2


In [10]:
df_mmm['FIPS'] = df_mmm.id.astype(str).str[9:]
df_mmm.head(5)
df_mmm.FIPS.astype(int)
df_mmm['FIPS'] = pd.to_numeric(df_mmm['FIPS'])

In [11]:
ACI_total  = dict(zip(df_mmm['FIPS'], df_mmm["Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination"]))

In [12]:

#df_m = pd.read_csv("Data/Medicaid_Demand.csv")

# Unemployment 
The unemployment rate is calculated by the U.S. Bureau of Labor Statistics as the percentage of the civilian labor force who are without jobs and have actively sought work within the past four weeks. <a href="https://www.bls.gov/lau/laufaq.htm#Q01"> Data Source for Unemployment  </a> 

In [13]:
from io import StringIO
import datetime 
from datetime import date
from dateutil.relativedelta import relativedelta

back = date.today() + relativedelta(months= -3)
three_months_ago = back.strftime('%b-%y') 


url = 'https://www.bls.gov/web/metro/laucntycur14.txt'
s = requests.get(url).text



df_unemp = pd.read_csv(StringIO(s), sep='|',  skiprows=7, skipfooter=6, engine='python', names = ['LAUS Area Code', 'FIPS State', 'FIPS County', 'Area Title', 'Period', 'Civilian Labor Force','Employed','Unemployed_Level','Unemployed_Rate'])

df_unemp['Period'] = df_unemp['Period'].astype(str)

df_unemp['FIPS'] = df_unemp['LAUS Area Code'].str[3:8]

df_unemp = df_unemp[df_unemp['Period'].str.contains(str(three_months_ago))]

df_unemp.head(5)
#df_unemp.dtypes

Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
38627,CN0100100000000,1,1,"Autauga County, AL",Aug-20,25446,24272,1174,4.6,1001
38628,CN0100300000000,1,3,"Baldwin County, AL",Aug-20,98267,93297,4970,5.1,1003
38629,CN0100500000000,1,5,"Barbour County, AL",Aug-20,9476,8806,670,7.1,1005
38630,CN0100700000000,1,7,"Bibb County, AL",Aug-20,8676,8141,535,6.2,1007
38631,CN0100900000000,1,9,"Blount County, AL",Aug-20,24792,23889,903,3.6,1009


In [14]:
#df_unemp[[ 'FIPS', 'Area Title', 'Period','Unemployed_Level','Unemployed_Rate']].to_csv('Data/County_employment.csv', index=False)


In [15]:


df_unemp.replace({'-', 0})

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.replace(',', '')

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].astype(str)

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.strip()

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].str.strip()

df_unemp['FIPS'] = pd.to_numeric(df_unemp['FIPS'])

df_unemp['Unemployed_Level'] = pd.to_numeric(df_unemp['Unemployed_Level'])

df_unemp['Unemployed_Rate'] = pd.to_numeric(df_unemp['Unemployed_Rate'])




# Fill NA with 0
#df_unemp = df_unemp.fillna(0)
df_unemp.tail(5)
#df_unemp.dtypes


Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
41841,CN7214500000000,72,145,"Vega Baja Municipio, PR",Aug-20,12806,11317,1489,11.6,72145
41842,CN7214700000000,72,147,"Vieques Municipio, PR",Aug-20,2487,2230,257,10.3,72147
41843,CN7214900000000,72,149,"Villalba Municipio, PR",Aug-20,6722,6096,626,9.3,72149
41844,CN7215100000000,72,151,"Yabucoa Municipio, PR",Aug-20,8064,7286,778,9.6,72151
41845,CN7215300000000,72,153,"Yauco Municipio, PR",Aug-20,9159,8203,956,10.4,72153


In [16]:
 
## Read the Unemployment data using cvs data 
#df_unemp = pd.read_csv("Data/Unemployment.csv")

# Fill NA with 0
#df_unemp = df_unemp.fillna(0)

#df_unemp.head(5)

# Years of Potential Life Lost (YPLL)

Years of Potential Life Lost (YPLL) measures the rate of premature deaths by region. YPLL is calculated as the sum of the estimated number of years that individuals would have lived if they had not died before the age of 75 per 100,000 people. <a href="https://www.countyhealthrankings.org/sites/default/files/media/document/2020%20County%20Health%20Rankings%20Data%20-%20v2.xlsx"> Data Source for YPLL.  </a> More information about YPLL can be dounf in this <a href="https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/county-health-rankings-model/health-outcomes/length-of-life/premature-death-ypll"> link. </a> 


In [17]:
# Read the YPLL data
df_y = pd.read_csv("Data/YPLL.csv")

# Fill NA with the mean of the data
df_y = df_y.fillna(df_y.mean())


# Population

In [18]:
# Read the Population data
df_pop = pd.read_csv("Data/County_pop_2019.csv")

# Fill NA with 0
df_pop = df_pop.fillna(0)


In [19]:
# Create a dictionary for the county and population
population_county = df_pop.set_index('FIPS')['pop'].to_dict()

# Create a dictionary for the county and YPLL
YPLL = dict(zip(df_y.FIPS, df_y.YPLL))

# Create a dictionary for the county and Unemployment
Unemployment = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Level))


# Create a dictionary for the county and Community Health Workers (CHW) demand
# Note that we assume a CHW can serve 55 Medicaid patient so the demand for CHW will be

#Medicaid_demand = dict(zip(df_m.FIPS, df_m.Med_Demand))


In [20]:
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

# COVID-19 Cases & COVID-19 Cases per Capita

What are COVID-19 Cases and COVID-19 Cases per Capita?

COVID-19 cases is an absolute metric of the total number of COVID-19 cases in a county over the last fourteen days.  COVID-19 cases per 100,000 is a relative metric calculated by dividing the number of COVID-19 cases by the estimated county population and multiplying by 100,000.  Cases include both confirmed cases, based on viral testing, and probable cases, based on specific criteria for symptoms and epidemiological exposure. We use NY Times Covid data. 


In [21]:
#### Data with the most recent date in NY Times dataset:

today = time.strftime('%Y-%m-%d')
covid_data_update_date = today#'2020-07-21'#today #or enter a specific date such as '2020-07-06'


In [22]:
## 14 day period defined
data_date_dt = pd.to_datetime(covid_data_update_date,infer_datetime_format = True)

N = 14

date_N_days_ago = data_date_dt - timedelta(days = N)

date_N1_days_ago = data_date_dt - timedelta(days = N+1)

In [23]:

# URL for mainland US data
url = "http://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
s = requests.get(url).content
covid = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [24]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths
780917,2020-11-29,Ozaukee,Wisconsin,55089.0,5255,40.0
780918,2020-11-29,Pepin,Wisconsin,55091.0,478,2.0
780919,2020-11-29,Pierce,Wisconsin,55093.0,2483,21.0
780920,2020-11-29,Polk,Wisconsin,55095.0,2159,14.0
780921,2020-11-29,Portage,Wisconsin,55097.0,4866,38.0
780922,2020-11-29,Price,Wisconsin,55099.0,754,4.0
780923,2020-11-29,Racine,Wisconsin,55101.0,15397,169.0
780924,2020-11-29,Richland,Wisconsin,55103.0,883,13.0
780925,2020-11-29,Rock,Wisconsin,55105.0,9795,84.0
780926,2020-11-29,Rusk,Wisconsin,55107.0,894,7.0


Note: Since NY data is seperately available, we first read the NY data for all 5 different borough then combine with the rest of US data.

In [25]:
# URL for NY
url = "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/trends/data-by-day.csv"
#"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/data-by-day.csv"

ny = requests.get(url).content
covid_ny = pd.read_csv(io.StringIO(ny.decode('utf-8')))


covid_ny.tail(5)

Unnamed: 0,date_of_interest,CASE_COUNT,HOSPITALIZED_COUNT,DEATH_COUNT,DEATH_COUNT_PROBABLE,CASE_COUNT_7DAY_AVG,HOSP_COUNT_7DAY_AVG,DEATH_COUNT_7DAY_AVG,BX_CASE_COUNT,BX_HOSPITALIZED_COUNT,BX_DEATH_COUNT,BX_CASE_COUNT_7DAY_AVG,BX_HOSPITALIZED_COUNT_7DAY_AVG,BX_DEATH_COUNT_7DAY_AVG,BK_CASE_COUNT,BK_HOSPITALIZED_COUNT,BK_DEATH_COUNT,BK_CASE_COUNT_7DAY_AVG,BK_HOSPITALIZED_COUNT_7DAY_AVG,BK_DEATH_COUNT_7DAY_AVG,MN_CASE_COUNT,MN_HOSPITALIZED_COUNT,MN_DEATH_COUNT,MN_CASE_COUNT_7DAY_AVG,MN_HOSPITALIZED_COUNT_7DAY_AVG,MN_DEATH_COUNT_7DAY_AVG,QN_CASE_COUNT,QN_HOSPITALIZED_COUNT,QN_DEATH_COUNT,QN_CASE_COUNT_7DAY_AVG,QN_HOSPITALIZED_COUNT_7DAY_AVG,QN_DEATH_COUNT_7DAY_AVG,SI_CASE_COUNT,SI_HOSPITALIZED_COUNT,SI_DEATH_COUNT,SI_CASE_COUNT_7DAY_AVG,SI_HOSPITALIZED_COUNT_7DAY_AVG,SI_DEATH_COUNT_7DAY_AVG,INCOMPLETE
267,11/22/2020,1002,98,8,1,1500,102,8,151,16,3,276,20,1,297,30,1,406,26,3,148,12,0,258,12,1,246,23,3,384,29,2,160,17,1,175,14,2,6000
268,11/23/2020,1818,128,9,4,1507,104,8,310,31,0,277,21,1,466,28,4,405,27,3,315,16,1,259,13,1,500,31,3,386,28,2,227,22,1,181,16,1,6000
269,11/24/2020,1486,103,12,5,1486,107,9,257,16,2,267,21,1,418,25,0,398,26,3,290,13,4,262,13,1,386,35,3,384,30,2,135,14,3,174,17,2,6000
270,11/25/2020,1396,93,7,4,1451,106,9,218,22,2,257,20,2,388,23,2,391,25,3,225,9,0,250,13,1,424,21,1,385,30,2,141,18,2,168,18,2,6000
271,11/26/2020,398,81,1,5,1260,103,8,72,9,0,218,19,1,118,16,1,341,24,2,67,18,0,215,14,1,102,20,0,338,28,2,39,18,0,148,18,2,6000


In [26]:
Kings = covid_ny[['date_of_interest', 'BK_CASE_COUNT', 'BK_DEATH_COUNT']]
Kings.rename(columns = {'BK_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BK_DEATH_COUNT': 'deaths'} , inplace=True)
Kings['county'] = 'Kings'
Kings['state'] = 'New York'
Kings['fips'] = 36047.0
#Kings.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [27]:
Bronx = covid_ny[['date_of_interest', 'BX_CASE_COUNT', 'BX_DEATH_COUNT']]
Bronx.rename(columns = {'BX_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BX_DEATH_COUNT': 'deaths'} , inplace=True)
Bronx['state'] = 'New York'
Bronx['county'] = 'Bronx'
Bronx['fips'] = 36005.0
#Bronx.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [28]:
Manhattan = covid_ny[['date_of_interest', 'MN_CASE_COUNT', 'MN_DEATH_COUNT']]
Manhattan.rename(columns = {'MN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'MN_DEATH_COUNT': 'deaths'} , inplace=True)
Manhattan['state'] = 'New York'
Manhattan['county'] = 'Manhattan'
Manhattan['fips'] = 36061.0
#Manhattan.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [29]:
Queens = covid_ny[['date_of_interest', 'QN_CASE_COUNT', 'QN_DEATH_COUNT']]
Queens.rename(columns = {'QN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'QN_DEATH_COUNT': 'deaths'} , inplace=True)
Queens['state'] = 'New York'
Queens['county'] = 'Queens'
Queens['fips'] = 36081.0
#Queens.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [30]:
Richmond = covid_ny[['date_of_interest', 'SI_CASE_COUNT', 'SI_DEATH_COUNT']]
Richmond.rename(columns = {'SI_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'SI_DEATH_COUNT': 'deaths'} , inplace=True)
Richmond['state'] = 'New York'
Richmond['county'] = 'Richmond'
Richmond['fips'] = 36085.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [31]:
covid.head(5)


Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [32]:
Bronx.tail(5)

Unnamed: 0,date,cases,deaths,state,county,fips
267,11/22/2020,151,3,New York,Bronx,36005.0
268,11/23/2020,310,0,New York,Bronx,36005.0
269,11/24/2020,257,2,New York,Bronx,36005.0
270,11/25/2020,218,2,New York,Bronx,36005.0
271,11/26/2020,72,0,New York,Bronx,36005.0


In [33]:
#Kings['deaths'] = Kings.groupby(by=['fips'])['deaths_d'].sum()
#Bronx['deaths'] = Bronx.groupby(by=['fips'])['deaths_d'].sum()
#Manhattan['deaths'] = Manhattan.groupby(by=['fips'])['deaths_d'].sum()
#Queens['deaths'] = Queens.groupby(by=['fips'])['deaths_d'].sum()
#Richmond['deaths'] = Richmond.groupby(by=['fips'])['deaths_d'].sum()

In [34]:
Kings.head(5)

Unnamed: 0,date,cases,deaths,county,state,fips
0,02/29/2020,0,0,Kings,New York,36047.0
1,03/01/2020,0,0,Kings,New York,36047.0
2,03/02/2020,0,0,Kings,New York,36047.0
3,03/03/2020,0,0,Kings,New York,36047.0
4,03/04/2020,1,0,Kings,New York,36047.0


In [35]:
#Kings = Kings.drop(['deaths_d'], axis=1)
#Bronx = Bronx.drop(['deaths_d'], axis=1)
#Manhattan = Manhattan.drop(['deaths_d'], axis=1)
#Queens = Queens.drop(['deaths_d'], axis=1)
#Richmond = Richmond.drop(['deaths_d'], axis=1)

In [36]:
Kings.tail(5)

Unnamed: 0,date,cases,deaths,county,state,fips
267,11/22/2020,297,1,Kings,New York,36047.0
268,11/23/2020,466,4,Kings,New York,36047.0
269,11/24/2020,418,0,Kings,New York,36047.0
270,11/25/2020,388,2,Kings,New York,36047.0
271,11/26/2020,118,1,Kings,New York,36047.0


In [37]:
covid['dt'] = pd.to_datetime(covid['date'], infer_datetime_format=True)
Kings['dt'] = pd.to_datetime(Kings['date'], infer_datetime_format=True)
Bronx['dt'] = pd.to_datetime(Bronx['date'], infer_datetime_format=True)
Manhattan['dt'] = pd.to_datetime(Manhattan['date'], infer_datetime_format=True)
Queens['dt'] = pd.to_datetime(Queens['date'], infer_datetime_format=True)
Richmond['dt'] = pd.to_datetime(Richmond['date'], infer_datetime_format=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

In [38]:

Kings_c = Kings[(Kings['dt']>date_N1_days_ago) & (Kings['dt']<= data_date_dt)].copy()
Queens_c = Queens[(Queens['dt']>date_N1_days_ago) & (Queens['dt']<= data_date_dt)].copy()
Bronx_c = Bronx[(Bronx['dt']>date_N1_days_ago) & (Bronx['dt']<= data_date_dt)].copy()
Manhattan_c = Manhattan[(Manhattan['dt']>date_N1_days_ago) & (Manhattan['dt']<= data_date_dt)].copy()
Richmond_c = Richmond[(Richmond['dt']>date_N1_days_ago) & (Richmond['dt']<= data_date_dt)].copy()

In [39]:
#Kings['cases'] = Kings.loc[(Kings['dt']<= data_date_dt)].groupby(['dt'])['d_cases'].sum()
#Kings['cases'] = Kings.apply(lambda x: x[(Kings['dt']<= data_date_dt)]['d_cases'].sum())

In [40]:
Kings.head(50)

Unnamed: 0,date,cases,deaths,county,state,fips,dt
0,02/29/2020,0,0,Kings,New York,36047.0,2020-02-29
1,03/01/2020,0,0,Kings,New York,36047.0,2020-03-01
2,03/02/2020,0,0,Kings,New York,36047.0,2020-03-02
3,03/03/2020,0,0,Kings,New York,36047.0,2020-03-03
4,03/04/2020,1,0,Kings,New York,36047.0,2020-03-04
5,03/05/2020,3,0,Kings,New York,36047.0,2020-03-05
6,03/06/2020,1,0,Kings,New York,36047.0,2020-03-06
7,03/07/2020,2,0,Kings,New York,36047.0,2020-03-07
8,03/08/2020,5,0,Kings,New York,36047.0,2020-03-08
9,03/09/2020,17,0,Kings,New York,36047.0,2020-03-09


In [41]:
K_c = Kings_c.groupby(['fips']).sum().reset_index()
Q_c = Queens_c.groupby(['fips']).sum().reset_index()
B_c = Bronx_c.groupby(['fips']).sum().reset_index()
M_c = Manhattan_c.groupby(['fips']).sum().reset_index()
R_c = Richmond_c.groupby(['fips']).sum().reset_index()
K_c.head(19)

Unnamed: 0,fips,cases,deaths
0,36047.0,4232,26


In [42]:
covid_death = covid[(covid['dt'] >= data_date_dt- timedelta(days = 1))]
Kings_death = Kings.groupby(by=['fips'])['deaths'].sum().reset_index()
Bronx_death = Bronx.groupby(by=['fips'])['deaths'].sum().reset_index()
Manhattan_death = Manhattan.groupby(by=['fips'])['deaths'].sum().reset_index()
Queens_death = Queens.groupby(by=['fips'])['deaths'].sum().reset_index()
Richmond_death = Richmond.groupby(by=['fips'])['deaths'].sum().reset_index()

In [43]:
# We merge the NY data with the rest of the US data
covid = pd.concat([covid, Kings, Bronx, Manhattan, Queens, Richmond], sort = False)


In [44]:
Queens.tail(14)

Unnamed: 0,date,cases,deaths,state,county,fips,dt
258,11/13/2020,426,1,New York,Queens,36081.0,2020-11-13
259,11/14/2020,319,2,New York,Queens,36081.0,2020-11-14
260,11/15/2020,230,1,New York,Queens,36081.0,2020-11-15
261,11/16/2020,490,1,New York,Queens,36081.0,2020-11-16
262,11/17/2020,402,3,New York,Queens,36081.0,2020-11-17
263,11/18/2020,414,4,New York,Queens,36081.0,2020-11-18
264,11/19/2020,430,0,New York,Queens,36081.0,2020-11-19
265,11/20/2020,428,2,New York,Queens,36081.0,2020-11-20
266,11/21/2020,281,0,New York,Queens,36081.0,2020-11-21
267,11/22/2020,246,3,New York,Queens,36081.0,2020-11-22


In [45]:

covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
222,10/08/2020,Richmond,New York,36085.0,33,0.0,2020-10-08
223,10/09/2020,Richmond,New York,36085.0,30,0.0,2020-10-09
224,10/10/2020,Richmond,New York,36085.0,34,1.0,2020-10-10
225,10/11/2020,Richmond,New York,36085.0,14,0.0,2020-10-11
226,10/12/2020,Richmond,New York,36085.0,37,1.0,2020-10-12
227,10/13/2020,Richmond,New York,36085.0,45,0.0,2020-10-13
228,10/14/2020,Richmond,New York,36085.0,33,0.0,2020-10-14
229,10/15/2020,Richmond,New York,36085.0,40,0.0,2020-10-15
230,10/16/2020,Richmond,New York,36085.0,36,0.0,2020-10-16
231,10/17/2020,Richmond,New York,36085.0,39,0.0,2020-10-17


In [46]:
print (data_date_dt)

2020-11-30 00:00:00


In [47]:

covid_death = covid_death[['fips', 'deaths']]
covid_death.head(2)

Unnamed: 0,fips,deaths
777720,1001.0,42.0
777721,1003.0,98.0


In [48]:
Kings_death.head(2)

Unnamed: 0,fips,deaths
0,36047.0,5841


In [49]:
# We merge the NY data with the rest of the US data
covid_death = pd.concat([covid_death, Kings_death, Bronx_death, Manhattan_death, Queens_death, Richmond_death], sort = False)

covid_death = covid_death.rename(columns={"deaths": "total_deaths"})


# Create a dictionary for the cumulative COVID deaths in each county
County_covid_death = dict(zip(covid_death.fips, covid_death.total_deaths))

covid_death.tail(250)

Unnamed: 0,fips,total_deaths
780722,51115.0,1.0
780723,51117.0,36.0
780724,51119.0,11.0
780725,51121.0,15.0
780726,51125.0,3.0
780727,51127.0,4.0
780728,51700.0,55.0
780729,51710.0,90.0
780730,51131.0,31.0
780731,51133.0,8.0


In [50]:
covid_death.shape

(3252, 2)

In [51]:
print (County_covid_death)

{1001.0: 42.0, 1003.0: 98.0, 1005.0: 11.0, 1007.0: 17.0, 1009.0: 40.0, 1011.0: 19.0, 1013.0: 42.0, 1015.0: 121.0, 1017.0: 50.0, 1019.0: 24.0, 1021.0: 38.0, 1023.0: 13.0, 1025.0: 19.0, 1027.0: 24.0, 1029.0: 14.0, 1031.0: 15.0, 1033.0: 42.0, 1035.0: 14.0, 1037.0: 4.0, 1039.0: 34.0, 1041.0: 30.0, 1043.0: 42.0, 1045.0: 54.0, 1047.0: 32.0, 1049.0: 36.0, 1051.0: 64.0, 1053.0: 31.0, 1055.0: 66.0, 1057.0: 16.0, 1059.0: 33.0, 1061.0: 8.0, 1063.0: 18.0, 1065.0: 31.0, 1067.0: 6.0, 1069.0: 38.0, 1071.0: 23.0, 1073.0: 500.0, 1075.0: 8.0, 1077.0: 54.0, 1079.0: 36.0, 1081.0: 66.0, 1083.0: 45.0, 1085.0: 29.0, 1087.0: 22.0, 1089.0: 148.0, 1091.0: 24.0, 1093.0: 36.0, 1095.0: 55.0, 1097.0: 362.0, 1099.0: 11.0, 1101.0: 236.0, 1103.0: 50.0, 1105.0: 6.0, 1107.0: 18.0, 1109.0: 14.0, 1111.0: 21.0, 1113.0: 3.0, 1117.0: 77.0, 1115.0: 55.0, 1119.0: 22.0, 1121.0: 54.0, 1123.0: 91.0, 1125.0: 154.0, 1127.0: 111.0, 1129.0: 19.0, 1131.0: 18.0, 1133.0: 23.0, 2013.0: 0.0, 2016.0: 0.0, 2020.0: 67.0, 2050.0: 5.0, 2060.0:

In [52]:
## subset last last 15 days
covid_last15 = covid[(covid['dt']>date_N1_days_ago) & (covid['dt']<= data_date_dt)].copy()
covid_last15['dt_time_delta'] = covid_last15['dt']-data_date_dt

In [53]:
## calculate new daily cases

## sort values by county and date
covid_last15.sort_values(by=['fips','dt'],inplace=True)
## remove data with 'unknown' counties
covid_last15 = covid_last15[covid_last15['fips'].notnull()].copy()

## calculate daily difference in number of cases
covid_last15['new_cases'] = covid_last15.groupby('fips')['cases'].transform(lambda x: x.diff())
## set negative new cases to zero, this can occuer due to the disperacy in the data

covid_last15.loc[covid_last15.new_cases < 1e-6, 'new_cases'] = 0
covid_last15.sort_index(inplace = True)

In [54]:
covid_last15[covid_last15.county == 'New Haven'].head(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt,dt_time_delta,new_cases
735828,2020-11-16,New Haven,Connecticut,9009.0,23113,1167.0,2020-11-16,-14 days,
739073,2020-11-17,New Haven,Connecticut,9009.0,23593,1168.0,2020-11-17,-13 days,480.0
742320,2020-11-18,New Haven,Connecticut,9009.0,24115,1172.0,2020-11-18,-12 days,522.0
745567,2020-11-19,New Haven,Connecticut,9009.0,24733,1180.0,2020-11-19,-11 days,618.0
748814,2020-11-20,New Haven,Connecticut,9009.0,25248,1190.0,2020-11-20,-10 days,515.0
752061,2020-11-21,New Haven,Connecticut,9009.0,25248,1190.0,2020-11-21,-9 days,0.0
755308,2020-11-22,New Haven,Connecticut,9009.0,25248,1190.0,2020-11-22,-8 days,0.0
758555,2020-11-23,New Haven,Connecticut,9009.0,26531,1202.0,2020-11-23,-7 days,1283.0
761801,2020-11-24,New Haven,Connecticut,9009.0,26643,1207.0,2020-11-24,-6 days,112.0
765047,2020-11-25,New Haven,Connecticut,9009.0,27092,1222.0,2020-11-25,-5 days,449.0


In [55]:
## select just last 14 days now that we have daily new cases with 15th day as baseline
covid_last14 = covid_last15[(covid_last15['dt'] > date_N_days_ago) & (covid_last15['dt'] <= data_date_dt)].copy()


In [56]:
## group by FIPS to get case load and follow up demand values for each county
covid_last14_stats = covid_last14.groupby(['fips'])['new_cases'].sum().reset_index(name ='total_cases')

covid_last14_stats[covid_last14_stats.fips == 9009].head(5)

Unnamed: 0,fips,total_cases
310,9009.0,4812.0


In [57]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
222,10/08/2020,Richmond,New York,36085.0,33,0.0,2020-10-08
223,10/09/2020,Richmond,New York,36085.0,30,0.0,2020-10-09
224,10/10/2020,Richmond,New York,36085.0,34,1.0,2020-10-10
225,10/11/2020,Richmond,New York,36085.0,14,0.0,2020-10-11
226,10/12/2020,Richmond,New York,36085.0,37,1.0,2020-10-12
227,10/13/2020,Richmond,New York,36085.0,45,0.0,2020-10-13
228,10/14/2020,Richmond,New York,36085.0,33,0.0,2020-10-14
229,10/15/2020,Richmond,New York,36085.0,40,0.0,2020-10-15
230,10/16/2020,Richmond,New York,36085.0,36,0.0,2020-10-16
231,10/17/2020,Richmond,New York,36085.0,39,0.0,2020-10-17


In [58]:
## group by FIPS to get case load and follow up demand values for each county
#covid_death = covid.groupby(['fips'])['deaths'].sum().reset_index(name ='total_deaths')
#covid_death.head(250)

In [59]:
## group by FIPS to get case load each month and follow up demand values for each county
covid['month'] = pd.to_datetime(covid['date'])
covid['year'] = pd.to_datetime(covid['date'])


In [60]:
#covid_last14_stats_montly = covid.groupby(['fips', covid.month.dt.month, covid.year.dt.year])['deaths'].sum().reset_index() 

#covid_last14_stats_montly['cumulative_death'] = covid_last14_stats_montly.groupby(['fips'])['deaths'].cumsum(axis = 0) 
#covid_last14_stats_montly = covid_last14_stats_montly.groupby(['fips', covid_last14_stats_montly.month,  covid_last14_stats_montly.year])['deaths'].cumsum()

#covid_last14_stats_montly.head(10)

In [61]:
# adding population information from CDC svi dataset
covid_last14_stats = covid_last14_stats.reset_index()
covid_last14_stats['fips'] = covid_last14_stats['fips'].astype(int)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)
covid_last14_stats = pd.merge(left = covid_last14_stats, right = svi_county[['E_TOTPOP','FIPS', 'STATE']], how = 'right', right_on = 'FIPS', left_on = 'fips' )
covid_last14_stats.fillna(0 , inplace=True)

In [62]:

covid_last14_stats.tail(5)

Unnamed: 0,index,fips,total_cases,E_TOTPOP,FIPS,STATE
3215,3217.0,72151.0,41.0,34149,72151,PUERTO RICO
3216,3218.0,72153.0,47.0,36439,72153,PUERTO RICO
3217,0.0,0.0,0.0,2132,2105,ALASKA
3218,0.0,0.0,0.0,689,2282,ALASKA
3219,0.0,0.0,0.0,75,15005,HAWAII


In [63]:
now = pd.to_datetime("now")

print (now)
m_now = now.month
print (m_now)

y_now = now.year

2020-11-30 09:11:48.643454
11


In [64]:
#one_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-1) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#one_month_lag_death = dict(zip(one_month_lag_cumulative_death.fips, one_month_lag_cumulative_death.cumulative_death))

#one_month_lag_cumulative_death.head(5)


In [65]:
#two_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-2) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#two_month_lag_death = dict(zip(two_month_lag_cumulative_death.fips, two_month_lag_cumulative_death.cumulative_death))


#two_month_lag_cumulative_death.head(5)

In [66]:
#three_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now - 3) & (covid_last14_stats_montly['year'] == y_now)]


# Create a dictionary for the 3 last month COVID deaths in each county
#three_month_lag_death = dict(zip(three_month_lag_cumulative_death.fips, three_month_lag_cumulative_death.cumulative_death))

#three_month_lag_cumulative_death.head(5)

#print (three_month_lag_death)

In [67]:

# Create a dictionary for the states of the given the county FIPS
county_of_states = dict(zip(svi_county.FIPS, svi_county.STATE))

# Create a dictionary for the name of the given the county FIPS
county_name = dict(zip(svi_county.FIPS, svi_county.COUNTY))

# Create the list for county FIPS, we consider counties as analogy to the center for community health workers
location = svi_county.FIPS.tolist() #[k for k in SVI_county] #[9001, 9003, 9005, 9007, 9009, 9011, 9013, 9015]#[k for k in SVI_county]




In [68]:
for j in County_covid_death:
       
    print (j, County_covid_death[j])

1001.0 42.0
1003.0 98.0
1005.0 11.0
1007.0 17.0
1009.0 40.0
1011.0 19.0
1013.0 42.0
1015.0 121.0
1017.0 50.0
1019.0 24.0
1021.0 38.0
1023.0 13.0
1025.0 19.0
1027.0 24.0
1029.0 14.0
1031.0 15.0
1033.0 42.0
1035.0 14.0
1037.0 4.0
1039.0 34.0
1041.0 30.0
1043.0 42.0
1045.0 54.0
1047.0 32.0
1049.0 36.0
1051.0 64.0
1053.0 31.0
1055.0 66.0
1057.0 16.0
1059.0 33.0
1061.0 8.0
1063.0 18.0
1065.0 31.0
1067.0 6.0
1069.0 38.0
1071.0 23.0
1073.0 500.0
1075.0 8.0
1077.0 54.0
1079.0 36.0
1081.0 66.0
1083.0 45.0
1085.0 29.0
1087.0 22.0
1089.0 148.0
1091.0 24.0
1093.0 36.0
1095.0 55.0
1097.0 362.0
1099.0 11.0
1101.0 236.0
1103.0 50.0
1105.0 6.0
1107.0 18.0
1109.0 14.0
1111.0 21.0
1113.0 3.0
1117.0 77.0
1115.0 55.0
1119.0 22.0
1121.0 54.0
1123.0 91.0
1125.0 154.0
1127.0 111.0
1129.0 19.0
1131.0 18.0
1133.0 23.0
2013.0 0.0
2016.0 0.0
2020.0 67.0
2050.0 5.0
2060.0 0.0
2068.0 0.0
2070.0 1.0
2090.0 17.0
2100.0 0.0
2110.0 3.0
2122.0 7.0
2130.0 0.0
2150.0 0.0
2158.0 1.0
2164.0 0.0
2170.0 6.0
2180.0 0.0
2185.0

26155.0 42.0
26147.0 83.0
26149.0 33.0
26157.0 54.0
nan 15.0
26159.0 43.0
26161.0 141.0
26163.0 3216.0
26165.0 13.0
27001.0 30.0
27003.0 228.0
27005.0 15.0
27007.0 17.0
27009.0 51.0
27011.0 2.0
27013.0 16.0
27015.0 15.0
27017.0 16.0
27019.0 13.0
27021.0 10.0
27023.0 7.0
27025.0 11.0
27027.0 57.0
27029.0 8.0
27031.0 0.0
27033.0 4.0
27035.0 34.0
27037.0 190.0
27039.0 0.0
27041.0 37.0
27043.0 2.0
27045.0 0.0
27047.0 6.0
27049.0 28.0
27051.0 6.0
27053.0 1115.0
27055.0 4.0
27057.0 25.0
27059.0 17.0
27061.0 23.0
27063.0 1.0
27065.0 13.0
27067.0 22.0
27069.0 8.0
27071.0 6.0
27073.0 3.0
27075.0 6.0
27077.0 1.0
27079.0 11.0
27081.0 1.0
27083.0 11.0
27087.0 4.0
27089.0 9.0
27091.0 20.0
27085.0 12.0
27093.0 9.0
27095.0 31.0
27097.0 29.0
27099.0 23.0
27101.0 3.0
27103.0 25.0
27105.0 30.0
27107.0 7.0
27109.0 34.0
27111.0 22.0
27113.0 8.0
27115.0 8.0
27117.0 18.0
27119.0 24.0
27121.0 1.0
27123.0 505.0
27125.0 3.0
27127.0 19.0
27129.0 27.0
27131.0 36.0
27133.0 9.0
27135.0 4.0
27139.0 55.0
27141.0 41.

47039.0 12.0
47043.0 34.0
47045.0 51.0
47047.0 38.0
47049.0 20.0
47051.0 34.0
47053.0 65.0
47055.0 49.0
47057.0 18.0
47059.0 72.0
47061.0 14.0
47063.0 61.0
47065.0 153.0
47067.0 3.0
47069.0 37.0
47071.0 25.0
47073.0 34.0
47075.0 30.0
47077.0 34.0
47079.0 19.0
47081.0 19.0
47083.0 20.0
47085.0 8.0
47087.0 10.0
47089.0 33.0
47091.0 21.0
47093.0 163.0
47095.0 6.0
47097.0 23.0
47099.0 34.0
47101.0 14.0
47103.0 16.0
47105.0 19.0
47111.0 28.0
47113.0 110.0
47115.0 19.0
47117.0 20.0
47119.0 67.0
47107.0 55.0
47109.0 33.0
47121.0 13.0
47123.0 44.0
47125.0 83.0
47127.0 3.0
47129.0 6.0
47131.0 51.0
47133.0 35.0
47135.0 21.0
47137.0 13.0
47139.0 15.0
47141.0 82.0
47143.0 31.0
47145.0 29.0
47147.0 53.0
47149.0 161.0
47151.0 7.0
47153.0 5.0
47155.0 31.0
47157.0 660.0
47159.0 19.0
47161.0 15.0
47163.0 97.0
47165.0 137.0
47167.0 39.0
47169.0 12.0
47171.0 26.0
47173.0 5.0
nan 7.0
47175.0 2.0
47177.0 20.0
47179.0 106.0
47181.0 10.0
47183.0 37.0
47185.0 18.0
47187.0 80.0
47189.0 78.0
48001.0 49.0
48003.

In [69]:
K_c.head()

Unnamed: 0,fips,cases,deaths
0,36047.0,4232,26


In [70]:
K = dict(zip(K_c.fips, K_c.cases))
Q = dict(zip(Q_c.fips, Q_c.cases))
B = dict(zip(B_c.fips, B_c.cases))
M = dict(zip(M_c.fips, M_c.cases))
R  = dict(zip(R_c.fips, R_c.cases))

In [71]:
print (K)

{36047.0: 4232}


In [72]:
# Create a dictionary for the county and covid cases
covid_cases_county_ny_times = dict(zip(covid_last14_stats.fips, covid_last14_stats.total_cases))
COVID_14days = {}

for j in location:
    if j in covid_cases_county_ny_times:
        COVID_14days[j] = covid_cases_county_ny_times[j] 
        print(j, covid_cases_county_ny_times[j])
        

        
    else:
        COVID_14days[j] = 0
        
        
    if j not in County_covid_death:
        print ('j',j)
        County_covid_death[j] = 0

#print (three_month_lag_death)
#print (two_month_lag_covid_death)
#print (one_month_lag_covid_death)
#for j in location:
#    if j not in three_month_lag_death:        
#        three_month_lag_death[j] = 0
        
#    if j not in two_month_lag_death:        
#        two_month_lag_death[j] = 0
        
#    if j not in one_month_lag_death:        
#        one_month_lag_death[j] = 0
        
    #if j not in County_covid_death:
    #    County_covid_death[j] = 0

35039 519.0
1001 270.0
1009 372.0
1013 78.0
1015 700.0
1017 200.0
1031 285.0
1033 389.0
1039 160.0
1043 730.0
1045 159.0
1051 408.0
1055 969.0
1067 51.0
1069 532.0
1071 543.0
1077 773.0
1079 216.0
1083 613.0
1089 1987.0
1095 954.0
1097 1339.0
1103 1285.0
1111 84.0
1113 147.0
1115 507.0
1117 1379.0
1121 355.0
2261 38.0
4021 2913.0
5009 379.0
5011 97.0
5033 389.0
5037 167.0
5045 653.0
5047 127.0
5051 589.0
5053 134.0
5063 216.0
5083 199.0
5085 408.0
5087 89.0
5115 466.0
5117 40.0
5121 131.0
5125 935.0
5131 836.0
5145 669.0
6007 432.0
6017 732.0
6023 163.0
6027 31.0
6061 1100.0
6079 879.0
6089 1584.0
6093 221.0
8019 45.0
8039 190.0
8047 26.0
8051 49.0
8057 8.0
8065 74.0
8067 694.0
8071 117.0
8075 478.0
8077 2037.0
8083 279.0
8085 447.0
8093 65.0
8099 240.0
8101 3208.0
8103 82.0
8119 229.0
8121 66.0
9005 1009.0
9007 763.0
9011 961.0
9015 517.0
10001 875.0
12019 1008.0
12033 1485.0
12089 395.0
12101 2117.0
12109 1158.0
12113 837.0
13015 637.0
13039 210.0
13045 691.0
13047 356.0
13057 1495.0

37055 103.0
37057 861.0
37059 263.0
37069 295.0
37071 1227.0
37077 243.0
37087 312.0
37097 984.0
37101 846.0
37109 495.0
37115 130.0
37121 113.0
37123 114.0
37125 351.0
37127 462.0
37139 102.0
37141 266.0
37145 147.0
37151 715.0
37157 525.0
37159 789.0
37161 437.0
37167 281.0
37169 252.0
37171 417.0
37197 278.0
38001 41.0
38003 281.0
38007 4.0
38009 99.0
38011 17.0
38021 57.0
38025 44.0
38027 50.0
38031 138.0
38033 20.0
38041 50.0
38043 53.0
38049 81.0
38055 136.0
38057 147.0
38059 637.0
38065 18.0
38067 132.0
38069 100.0
38073 92.0
38075 82.0
38081 95.0
38091 40.0
38095 42.0
38097 159.0
38099 257.0
39003 1411.0
39005 423.0
39007 915.0
39011 595.0
39013 559.0
39015 388.0
39017 3670.0
39019 208.0
39021 324.0
39023 1407.0
39025 1946.0
39027 298.0
39029 993.0
39033 456.0
39037 825.0
39039 522.0
39043 747.0
39045 1561.0
39051 411.0
39053 318.0
39055 821.0
39057 1527.0
39059 348.0
39063 773.0
39065 435.0
39067 118.0
39069 246.0
39071 303.0
39073 230.0
39077 580.0
39081 651.0
39083 572.0
390

17071 60.0
17085 177.0
17087 133.0
17089 5662.0
17093 1356.0
17095 662.0
17101 462.0
17103 540.0
17107 500.0
17113 1730.0
17121 637.0
17135 230.0
17137 506.0
17139 187.0
17145 324.0
17157 453.0
17169 97.0
17177 430.0
17183 988.0
18011 621.0
18017 321.0
18039 3326.0
18053 871.0
18057 3662.0
18075 296.0
18077 485.0
18121 202.0
18123 122.0
18133 320.0
18153 374.0
18163 1888.0
18167 1525.0
19009 33.0
19013 1620.0
19019 232.0
19025 172.0
19053 47.0
19057 536.0
19059 273.0
19063 135.0
19103 1236.0
19113 2539.0
19115 143.0
19119 224.0
19127 465.0
19137 106.0
19153 5346.0
19157 165.0
19167 441.0
19191 220.0
19193 1071.0
20003 52.0
20005 138.0
20021 193.0
20023 24.0
20033 51.0
20039 10.0
20043 159.0
20045 735.0
20047 45.0
20053 195.0
20063 46.0
20065 37.0
20073 42.0
20089 53.0
20091 4750.0
20129 59.0
20135 74.0
20137 75.0
20145 132.0
20149 102.0
20153 58.0
20157 147.0
20171 104.0
20177 1754.0
20183 72.0
20187 24.0
20203 48.0
20205 67.0
20207 17.0
21001 187.0
21019 719.0
21021 266.0
21027 95.0
2

51620 17.0
51660 215.0
51683 153.0
51685 49.0
51690 136.0
51700 455.0
51740 172.0
51760 662.0
53025 810.0
54013 33.0
54109 222.0
1065 101.0
2013 17.0
2185 73.0
4009 490.0
4012 184.0
5035 347.0
5095 38.0
5099 64.0
6039 605.0
6071 18090.0
6075 1693.0
6077 2008.0
6099 2725.0
12013 66.0
12077 40.0
12079 87.0
12086 24002.0
12093 149.0
12123 70.0
12125 33.0
13019 94.0
13063 809.0
13095 114.0
13099 54.0
13109 23.0
13215 357.0
13261 57.0
13267 100.0
13301 16.0
13303 127.0
16065 719.0
20057 373.0
21013 175.0
21045 147.0
21065 100.0
21075 36.0
21109 77.0
21121 176.0
21129 400.0
21203 62.0
24510 3300.0
26085 58.0
28013 91.0
29223 86.0
30035 67.0
31043 216.0
31047 263.0
34011 588.0
34017 4383.0
34031 4841.0
34039 3907.0
35005 1516.0
35019 131.0
35023 37.0
35033 15.0
35045 1441.0
35051 144.0
35053 211.0
36047 202.0
36081 320.0
36087 1598.0
37083 248.0
38005 99.0
40001 154.0
40139 271.0
41049 64.0
45009 31.0
45033 99.0
45053 69.0
45075 226.0
45089 76.0
47007 63.0
47061 78.0
48003 159.0
48013 119.0
4

In [73]:
for j in location: 
    if j in K:
        COVID_14days[j] = K[j]
        print (j, COVID_14days[j], K[j])

    if j in Q:
        COVID_14days[j] = Q[j]
        print (j, COVID_14days[j])
        
    if j in B:
        COVID_14days[j] = B[j]
        print (j, COVID_14days[j])
        
    if j in M:
        COVID_14days[j] = M[j]
        print (j, COVID_14days[j])

    if j in R:
        COVID_14days[j] = R[j]
        print (j, COVID_14days[j])

36085 1764
36061 2704
36047 4232 4232
36081 4103
36005 2792


In [74]:
print (covid_cases_county_ny_times)

{1001.0: 270.0, 1003.0: 1124.0, 1005.0: 45.0, 1007.0: 195.0, 1009.0: 372.0, 1011.0: 23.0, 1013.0: 78.0, 1015.0: 700.0, 1017.0: 200.0, 1019.0: 117.0, 1021.0: 166.0, 1023.0: 15.0, 1025.0: 87.0, 1027.0: 70.0, 1029.0: 96.0, 1031.0: 285.0, 1033.0: 389.0, 1035.0: 38.0, 1037.0: 76.0, 1039.0: 160.0, 1041.0: 35.0, 1043.0: 730.0, 1045.0: 159.0, 1047.0: 199.0, 1049.0: 671.0, 1051.0: 408.0, 1053.0: 104.0, 1055.0: 969.0, 1057.0: 173.0, 1059.0: 198.0, 1061.0: 123.0, 1063.0: 50.0, 1065.0: 101.0, 1067.0: 51.0, 1069.0: 532.0, 1071.0: 543.0, 1073.0: 4578.0, 1075.0: 99.0, 1077.0: 773.0, 1079.0: 216.0, 1081.0: 545.0, 1083.0: 613.0, 1085.0: 36.0, 1087.0: 90.0, 1089.0: 1987.0, 1091.0: 91.0, 1093.0: 129.0, 1095.0: 954.0, 1097.0: 1339.0, 1099.0: 58.0, 1101.0: 736.0, 1103.0: 1285.0, 1105.0: 48.0, 1107.0: 165.0, 1109.0: 90.0, 1111.0: 84.0, 1113.0: 147.0, 1115.0: 507.0, 1117.0: 1379.0, 1119.0: 48.0, 1121.0: 355.0, 1123.0: 159.0, 1125.0: 1281.0, 1127.0: 382.0, 1129.0: 42.0, 1131.0: 32.0, 1133.0: 201.0, 2013.0: 17

In [75]:
#Parameters
pro_c_s = [(i,county_of_states[i]) for i in location ]
cartesian_pro_county_state = gp.tuplelist(pro_c_s)


In [76]:
df = covid_last14_stats
df['fips'] = df['fips'].astype(int)

In [77]:
###############################################################################################
######################## END calculating different types of vulnerabilities ###################

Since we allocate CHW proportional to the county values of certain vulnaribilites within state, we need a few function to help us with the calculations. 



In [78]:
# This function return the value for the state for the given dictionary

# More specifically sum upt the values for the counties of each state

def total_state(dict_1):
    state_dict = {}
    for s in State:
        state_dict [s] = sum(float(dict_1[j]) for (j,s) in cartesian_pro_county_state.select('*', s) if j in dict_1)  
    return state_dict



In [79]:
# Calculte the population per state by summing up the population in each county in the state
State_pop = total_state(population_county)

In [80]:


# This function returns the ratio of the dict value for county and state of the county
def Proportional(county_level, state_level):
    
    prop = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            prop[j] = (float(county_level[j])/float(state_level[s]))    
        else:
            prop[j] = 0
                
    return prop

In [81]:
print (ACI_total)
print (ACI_total[35039])

{1001: 9049, 1003: 30763, 1005: 7244, 1007: 4272, 1009: 9290, 1011: 2935, 1013: 4814, 1015: 25890, 1017: 7400, 1019: 5105, 1021: 9147, 1023: 3273, 1025: 6716, 1027: 3112, 1029: 3180, 1031: 9926, 1033: 9540, 1035: 2765, 1037: 2155, 1039: 6835, 1041: 3219, 1043: 15478, 1045: 10930, 1047: 13544, 1049: 17896, 1051: 12373, 1053: 6235, 1055: 21998, 1057: 4144, 1059: 6715, 1061: 6647, 1063: 3143, 1065: 4730, 1067: 3651, 1069: 22918, 1071: 11352, 1073: 126944, 1075: 3099, 1077: 15561, 1079: 6877, 1081: 23596, 1083: 14596, 1085: 3540, 1087: 5301, 1089: 48769, 1091: 5053, 1093: 6051, 1095: 20669, 1097: 90962, 1099: 5565, 1101: 52763, 1103: 22180, 1105: 3375, 1107: 4807, 1109: 6945, 1111: 4428, 1113: 14480, 1115: 12599, 1117: 20035, 1119: 4098, 1121: 19979, 1123: 9968, 1125: 34877, 1127: 13887, 1129: 4268, 1131: 4086, 1133: 5577, 2013: 444, 2016: 400, 2020: 49663, 2050: 8680, 2060: 141, 2068: 142, 2070: 1983, 2090: 12661, 2100: 606, 2105: 504, 2110: 4631, 2122: 11347, 2130: 2881, 2150: 2412, 2158

In [82]:
#ACI
ACI_State = total_state(ACI_total)

In [83]:
# Create dicts for the variables of SVI
E_POV = dict(zip(svi_county.FIPS, svi_county.EP_POV))
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.EP_UNEMP))
E_PCI = dict(zip(svi_county.FIPS, svi_county.EP_PCI))
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.EP_NOHSDP))
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.EP_AGE65))
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.EP_AGE17))
E_DISABL = dict(zip(svi_county.FIPS, svi_county.EP_DISABL))
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.EP_SNGPNT))
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.EP_MINRTY))
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.EP_LIMENG))
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.EP_MUNIT))
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.EP_MOBILE))
E_CROWD = dict(zip(svi_county.FIPS, svi_county.EP_CROWD))
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.EP_NOVEH))
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.EP_GROUPQ))


# Calculate the state value for the SVI variables
E_POV_State = total_state(E_POV)
E_UNEMP_State = total_state(E_UNEMP) 
E_PCI_State = total_state(E_PCI)
E_NOHSDP_State = total_state(E_NOHSDP) 
E_AGE65_State = total_state(E_AGE65)
E_AGE17_State = total_state(E_AGE17)
E_DISABL_State = total_state(E_DISABL)
E_SNGPNT_State = total_state(E_SNGPNT)
E_MINRTY_State = total_state(E_MINRTY)
E_LIMENG_State = total_state(E_LIMENG)
E_MUNIT_State = total_state(E_MUNIT)
E_MOBILE_State = total_state(E_MOBILE)
E_CROWD_State = total_state(E_CROWD)
E_NOVEH_State = total_state(E_NOVEH)
E_GROUPQ_State = total_state(E_GROUPQ)



# Calculate the proportinal values for the SVI variables
E_POV_Prop = Proportional(E_POV, E_POV_State )
E_UNEMP_Prop = Proportional(E_UNEMP, E_UNEMP_State ) 
E_PCI_Prop = Proportional(E_PCI, E_PCI_State )
E_NOHSDP_Prop = Proportional(E_NOHSDP, E_NOHSDP_State ) 
E_AGE65_Prop = Proportional(E_AGE65, E_AGE65_State )
E_AGE17_Prop = Proportional(E_AGE17, E_AGE17_State )
E_DISABL_Prop = Proportional(E_DISABL, E_DISABL_State )
E_SNGPNT_Prop = Proportional(E_SNGPNT, E_SNGPNT_State )
E_MINRTY_Prop = Proportional(E_MINRTY, E_MINRTY_State )
E_LIMENG_Prop = Proportional(E_LIMENG, E_LIMENG_State )
E_MUNIT_Prop = Proportional(E_MUNIT, E_MUNIT_State )
E_MOBILE_Prop = Proportional(E_MOBILE, E_MOBILE_State )
E_CROWD_Prop = Proportional(E_CROWD, E_CROWD_State )
E_NOVEH_Prop = Proportional(E_NOVEH, E_NOVEH_State )
E_GROUPQ_Prop = Proportional(E_GROUPQ, E_GROUPQ_State )

ACI_Prop = Proportional(ACI_total, ACI_State)


# SVI calculation 

We calculate the ratio of county value to state value by population for each SVI variables (we use EP-estimate percentage- values in the CDC data set), then we take the average of all 15 SVI variables. 

Let SVI variable set be K, where  

K = { Below Poverty, Unemployed, Income, No High School Diploma, Aged 65 or Older, Aged 17 or Younger, Civilian with a Disability, Single-Parent Households, Minority, Speaks English “Less than Well”, Multi-Unit Structures, Mobile Homes, Crowding, No Vehicle, Group Quarters }

We will use these variables in a county base and state base. While County base values are exactly same as the estimated values for these variables in the CDC website, to calculate the state base, we simply sum the county values for all of the counties in each state for each variable. Then we use the following formula to calculate the SVI value for each county.

Let $S$ is the set of states and $j$ is a county in the state $s$, where $s \in S$, $c^k_j$ SVI variable $k \in K$ value for county j, and $c_s$ SVI variable value for state s.

$SVI_j = \frac{1}{15}\sum_{k \in K} \frac{c^k_j}{c^k_s}$


In [84]:

from collections import Counter
# Sum all SVI variable values for each county
SVI_county_sum = dict(Counter(E_POV_Prop) + Counter(E_UNEMP_Prop) + Counter(E_PCI_Prop) + Counter(E_NOHSDP_Prop) + Counter(E_AGE65_Prop) + Counter(E_AGE17_Prop) + Counter(E_DISABL_Prop) + Counter(E_SNGPNT_Prop) + Counter(E_MINRTY_Prop) + Counter(E_LIMENG_Prop) + Counter(E_MUNIT_Prop) + Counter(E_MOBILE_Prop) + Counter(E_CROWD_Prop) + Counter(E_NOVEH_Prop) + Counter(E_GROUPQ_Prop))

# Divide the sum of all SVI variable values
SVI_county = {j: SVI_county_sum[j]/15 for j in SVI_county_sum }


# Proportional Allocation

We consider allocating 1 million CHW over the states proportional to Medicaid enrollment in each state. Further, we allocate CHW to counties in each state proportional to different county vulnerability criterias as follow.

- MEDICAID
- SVI
- YPLL
- UNEMPLOYMENT
- LAST 14 DAYS COVID CASES
- LAST 14 DAYS COVID CASES / POP
- COVID DEATHS / POP

To calculate the total number of allocated CHW to per county according to these vulnerability criterias, we define the following function called "Proportional_allocation", in which we multiply the CHW allocated to each state with the ratio of the chosen vulnerability criteria of the county to the chosen vulnerability criteria of the state, the function return a dictionary with the counties as keys and the number of CHW allocated to each county for the chosen vulnerability criteria as values. 

In [85]:
def Proportional_allocation(county_level, state_level, state_budget):
    prop_allocate = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            #print (j,s, county_level[j],state_level[s], state_budget[s])
            prop_allocate[j,s] = (float(county_level[j])/float(state_level[s]))*float(state_budget[s])
        
        else:
            prop_allocate[j,s] = 0
            
    
    return prop_allocate

In [86]:
Medicaid_dem = Proportional_allocation(ACI_total, ACI_State, Medicaid_state)
Medicaid_demand = {m[0]: Medicaid_dem[m] for m in Medicaid_dem}
for s in State:
#    print (s)
#    print ( ACI_State[s])
    print (s,Medicaid_state[s])
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

NEW MEXICO 772102
ALABAMA 957116
ALASKA 231145
ARIZONA 1839932
ARKANSAS 830467
CALIFORNIA 11847711
COLORADO 1337805
CONNECTICUT 874974
DELAWARE 239009
FLORIDA 3892552
GEORGIA 1928703
IDAHO 340742
ILLINOIS 2987496
INDIANA 1602976
IOWA 699741
KANSAS 401103
KENTUCKY 1416013
LOUISIANA 1585024
MAINE 232455
MARYLAND 1372695
MASSACHUSETTS 1616404
MICHIGAN 2439425
MINNESOTA 1085778
MISSISSIPPI 632427
MISSOURI 923641
MONTANA 247333
NEBRASKA 254159
NEVADA 685073
NEW HAMPSHIRE 193436
NEW JERSEY 1759653
NEW YORK 6263164
NORTH CAROLINA 1851558
NORTH DAKOTA 96757
OHIO 2788134
OKLAHOMA 797220
OREGON 1053931
PENNSYLVANIA 3069309
RHODE ISLAND 305208
SOUTH CAROLINA 1048276
SOUTH DAKOTA 114059
TENNESSEE 1489536
TEXAS 4457644
UTAH 338812
VERMONT 161049
VIRGINIA 1497770
WASHINGTON 1780968
WEST VIRGINIA 521290
WISCONSIN 1112844
WYOMING 59302
HAWAII 351337
DISTRICT OF COLUMBIA 248591
PUERTO RICO 1622194


In [87]:
#print ('Med', Medicaid_demand[35039])
#print ('Med', Medicaid_demand[4017], 'Pop', population_county[4017], 'Med_capita', 100000*(Medicaid_demand[4017]/population_county[4017]))

In [88]:
#Further we create additional vulnerability values by considering SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita and the number of Medicaid enrolles in each county together

Covid_capita = {j: 100000*(COVID_14days[j]/population_county[j]) for j in location}

Covid_death_capita = {j: 100000*(County_covid_death[j]/population_county[j]) for j in location}

Medicaid_capita = {j: 100000*(Medicaid_demand[j]/population_county[j]) for j in location}

Unemployment_capita = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Rate))

#Three_month_lag_covid_death_capita = {j: 100000*(three_month_lag_death[j]/population_county[j]) for j in location}

#Two_month_lag_covid_death_capita = {j: 100000*(two_month_lag_death[j]/population_county[j]) for j in location}

#One_month_lag_covid_death_capita = {j: 100000*(one_month_lag_death[j]/population_county[j]) for j in location}

In [89]:
for j in location:
    print (j, County_covid_death[j], population_county[j], Covid_death_capita[j])

35039 22.0 38921 56.52475527350274
1001 42.0 55869 75.17585781023465
1009 40.0 57826 69.1730363504306
1013 42.0 19448 215.9605100781571
1015 121.0 113605 106.5093965934598
1017 50.0 33254 150.3578516870151
1031 15.0 52342 28.657674525237862
1033 42.0 55241 76.03048460382686
1039 34.0 37049 91.7703581743097
1043 42.0 83768 50.138477700315164
1045 54.0 49172 109.81859594891401
1051 64.0 81209 78.8089990025736
1055 66.0 102268 64.53631634528885
1067 6.0 17205 34.873583260680036
1069 38.0 105882 35.88900851891728
1071 23.0 51626 44.55119513423469
1077 54.0 92729 58.23420936276677
1079 36.0 32924 109.34272870854088
1083 45.0 98915 45.493605620987715
1089 148.0 372909 39.68796676937269
1095 55.0 96774 56.83344700022733
1097 362.0 413210 87.60678589579149
1103 50.0 119679 41.77842395073488
1111 21.0 22722 92.42144177449168
1113 3.0 57961 5.175894135711944
1115 55.0 89512 61.44427562784878
1117 77.0 217702 35.369449982085605
1121 54.0 79978 67.51856760609168
2261 2.0 9202 21.734405564007826
40

29097 60.0 121328 49.45272319662403
29099 103.0 225081 45.76130370844274
29103 1.0 3959 25.25890376357666
29105 30.0 35723 83.97950899980404
29107 29.0 32708 88.6633239574416
29109 37.0 38355 96.46721418328771
29113 9.0 59013 15.250876925423213
29115 9.0 11920 75.50335570469798
29121 3.0 15117 19.845207382417147
29125 3.0 8697 34.494653328734046
29127 16.0 28530 56.08131791097091
29129 1.0 3617 27.647221454243848
29131 24.0 25619 93.68047152504
29137 3.0 8644 34.706154558074964
29139 5.0 11551 43.286295558826076
29141 12.0 20627 58.17617685557764
29145 38.0 58236 65.25173432241226
29151 3.0 13615 22.034520749173705
29157 12.0 19136 62.70903010033445
29159 32.0 42339 75.58043411511845
29161 42.0 44573 94.22744710923654
29167 23.0 32149 71.54188310678404
29171 1.0 4696 21.294718909710394
29173 1.0 10309 9.700261907071491
29177 4.0 23018 17.377704405248068
29183 224.0 402022 55.71834377223137
29186 2.0 17894 11.176930814798256
29189 976.0 994205 98.16888871007488
29195 12.0 22761 52.72176

39089 77.0 176862 43.536768780178896
39091 18.0 45672 39.4114555964267
39093 106.0 309833 34.21197871111212
39099 301.0 228683 131.62325140041017
39103 57.0 179746 31.71141499671759
39107 50.0 41172 121.44175653356649
39109 68.0 106987 63.55912400572032
39111 21.0 13654 153.80108393144866
39113 243.0 531687 45.70358124234747
39117 2.0 35328 5.661231884057971
39119 10.0 86215 11.598909702487965
39123 30.0 40525 74.02837754472549
39125 12.0 18672 64.26735218508998
39127 13.0 36134 35.97719599269386
39133 74.0 162466 45.54799157977669
39135 21.0 40882 51.36734993395626
39137 49.0 33861 144.7092525324119
39139 39.0 121154 32.190435313732934
39141 63.0 76666 82.17462760545745
39143 29.0 58518 49.557401141529105
39147 19.0 55178 34.43401355612745
39149 17.0 48590 34.98662276188516
39151 221.0 370606 59.63206208210337
39153 337.0 541013 62.2905549404543
39155 149.0 197974 75.2624081950155
39157 67.0 91987 72.83637905356191
39159 8.0 58988 13.562080423136909
39161 18.0 28275 63.6604774535809
3

56019 4.0 8445 47.365304914150386
56021 23.0 99500 23.115577889447238
56025 46.0 79858 57.60224398306995
56029 6.0 29194 20.55216825375077
56031 7.0 8393 83.40283569641367
56033 10.0 30485 32.80301787764474
56035 1.0 9831 10.171905197843556
56043 8.0 7805 102.49839846252402
56045 1.0 6927 14.436263894903998
1003 98.0 223234 43.9001227411595
1019 24.0 26196 91.61704076958314
1021 38.0 44428 85.53164670928243
1027 24.0 13235 181.33736305251227
1041 30.0 13772 217.83328492593665
1049 36.0 71513 50.34049753191727
1057 16.0 16302 98.1474665685192
1061 8.0 26271 30.45182901298009
1073 500.0 658573 75.92172773557373
1075 8.0 13805 57.95001810938066
1081 66.0 164542 40.111339354085885
1093 36.0 29709 121.17540139351712
1109 14.0 33114 42.27819049344688
1123 91.0 40367 225.4316644784106
1125 154.0 209355 73.5592653626615
2068 0.0 2097 0.0
2090 17.0 96849 17.5530981218185
2110 3.0 31974 9.382623381497467
2122 7.0 58708 11.923417592150985
2195 0.0 3266 0.0
2220 0.0 8493 0.0
2230 0.0 1183 0.0
2275

22023 3.0 6973 43.02308905779435
22053 51.0 31368 162.58607498087224
22057 139.0 97614 142.3976069006495
22073 212.0 153279 138.3098793702986
22079 188.0 129648 145.00802172035048
22081 26.0 8442 307.9838900734423
22089 70.0 53100 131.82674199623352
22099 71.0 53431 132.88166045928395
22111 57.0 22108 257.8252216392256
22121 43.0 26465 162.47874551294163
22123 20.0 10830 184.67220683287164
22125 25.0 15568 160.5858170606372
23003 1.0 67055 1.491313101185594
23015 2.0 34634 5.774672287347693
24003 298.0 579234 51.44725620388306
24005 732.0 827370 88.47311360092824
24011 10.0 33406 29.934742261869122
24017 105.0 163257 64.31577206490381
24027 145.0 325690 44.520863397709476
24041 7.0 37181 18.826820150076653
24045 59.0 103609 56.94486000250944
25001 192.0 212990 90.14507723367295
25005 853.0 565217 150.91548909533861
25015 162.0 160830 100.72747621712368
25021 1178.0 706775 166.67256198931767
25027 1274.0 830622 153.3790340251041
26003 2.0 9108 21.958717610891526
26009 5.0 23324 21.43714

6087 28.0 273213 10.248414240903617
6097 157.0 494336 31.759774728120146
6103 25.0 65084 38.411898469669964
6111 175.0 846006 20.68543249102252
8033 0.0 2055 0.0
8037 11.0 55127 19.953924574165107
8045 6.0 60061 9.989843658946738
8053 0.0 820 0.0
8055 5.0 6897 72.4952878062926
8073 0.0 5701 0.0
8087 52.0 29068 178.89087656529517
8109 2.0 6824 29.30832356389214
8111 0.0 728 0.0
8117 4.0 31011 12.89864886653123
8125 2.0 10019 19.962072063080146
9001 1491.0 943332 158.05676050425515
9009 1238.0 854757 144.83648569125492
12003 32.0 29210 109.55152345087299
12007 19.0 28201 67.37349739370944
12023 94.0 71686 131.12741678988922
12055 146.0 106221 137.44928027414542
12057 929.0 1471968 63.112785060544795
12061 136.0 159923 85.04092594561132
12065 11.0 14246 77.21465674575319
12081 371.0 403253 92.0017953989183
12085 172.0 161000 106.83229813664597
12087 27.0 74228 36.37441396777496
12099 1678.0 1496770 112.10807271658304
12115 389.0 433742 89.68465124428808
12129 14.0 33739 41.49500577966152


51021 5.0 6280 79.61783439490446
51025 7.0 16231 43.127348900252606
51059 623.0 1147532 54.29042501647013
51107 142.0 413538 34.33783594252523
51111 4.0 12196 32.797638570022954
51135 22.0 15232 144.4327731092437
51141 28.0 17608 159.01862789641072
51167 9.0 26586 33.852403520649965
51175 40.0 17631 226.873121206965
51510 77.0 159428 48.29766414933387
51530 1.0 6478 15.436863229391788
51600 9.0 24019 37.47033598401266
51630 6.0 29036 20.664003306240527
51650 35.0 134510 26.02037023269645
51775 22.0 25301 86.95308485830598
51830 8.0 14954 53.49739200213989
51840 4.0 28078 14.246028919438707
53013 2.0 3985 50.18820577164366
53017 12.0 43429 27.631306270003915
53019 1.0 7627 13.111315064901008
53047 15.0 42243 35.50884170158369
53049 3.0 22471 13.350540696898223
53051 1.0 13724 7.286505392013991
53057 27.0 129205 20.89702410897411
53069 0.0 4488 0.0
54055 37.0 58758 62.97014874570271
54075 0.0 8247 0.0
54089 10.0 12573 79.53551260637876
54099 15.0 39402 38.06913354652048
54105 1.0 5821 17

36047 5841.0 2559903 228.1727081065181
36081 6097.0 2253858 270.5139365479103
36087 527.0 325789 161.76113987887865
37083 36.0 50010 71.9856028794241
38005 14.0 6832 204.91803278688525
40001 13.0 22194 58.57438947463278
40139 12.0 19983 60.05104338687885
41049 7.0 11603 60.32922520037921
45009 37.0 14066 263.04564197355324
45033 52.0 30479 170.60927195774138
45053 23.0 30073 76.48056396102817
45075 133.0 86175 154.33710472874964
45089 51.0 30368 167.93993677555324
47007 5.0 15064 33.191715347849176
47061 14.0 13427 104.26752066731214
48003 13.0 18705 69.50013365410318
48013 41.0 51153 80.15170175747268
48017 6.0 7000 85.71428571428571
48025 40.0 32565 122.83126055581145
48069 8.0 7530 106.24169986719787
48079 5.0 2853 175.2541184717841
48095 2.0 2726 73.36757153338225
48101 4.0 1398 286.1230329041488
48103 7.0 4797 145.9245361684386
48113 1536.0 2635516 58.280807249889584
48117 38.0 18546 204.895934433301
48153 17.0 5712 297.6190476190476
48169 14.0 6229 224.75517739605073
48177 24.0 2

In [90]:
County_covid_death[9009], population_county[9009], Covid_death_capita[9009]

(1238.0, 854757, 144.83648569125492)

In [91]:
# Dictionary for the total Covid per capita for each state
Total_covid_cap = total_state(Covid_capita) 


# Dictionary for the total Covid death per capita for each state
Total_covid_death_cap = total_state(Covid_death_capita) 


# Dictionary for the total Medicaid per capita for each state
Total_medicaid_cap = total_state(Medicaid_capita)


# Dictionary for the total Unemployment per capita for each state
Total_unemployment_cap = total_state(Unemployment_capita)



In [92]:
# Dictionaries for the different vulnerability criteria values for states

# Dictionary for total Medicaid patient numbers for each state
Medicaid_demand_state = total_state(Medicaid_demand) 

# Dictionary for total positive COVID cases for last 14 days in each state
Covid_state = total_state(COVID_14days) 

# Dictionary for total SVI values for each state
SVI_state = total_state(SVI_county) 

# Dictionary for total YPLL values for each state
YPLL_state = total_state(YPLL) 

# Dictionary for total Unemployment numbers for each state
Unemployment_state = total_state(Unemployment)  

In [93]:
print(Medicaid_demand_state)

{'NEW MEXICO': 772102.0, 'ALABAMA': 957115.9999999999, 'ALASKA': 231145.00000000006, 'ARIZONA': 1839932.0, 'ARKANSAS': 830466.9999999999, 'CALIFORNIA': 11847710.999999998, 'COLORADO': 1337804.9999999998, 'CONNECTICUT': 874974.0, 'DELAWARE': 239009.0, 'FLORIDA': 3892552.0, 'GEORGIA': 1928703.000000001, 'IDAHO': 340741.9999999999, 'ILLINOIS': 2987495.999999999, 'INDIANA': 1602975.9999999995, 'IOWA': 699741.0000000003, 'KANSAS': 401103.00000000006, 'KENTUCKY': 1416012.9999999995, 'LOUISIANA': 1585023.9999999998, 'MAINE': 232455.00000000003, 'MARYLAND': 1372695.0, 'MASSACHUSETTS': 1616404.0, 'MICHIGAN': 2439425.0000000005, 'MINNESOTA': 1085777.9999999998, 'MISSISSIPPI': 632427.0000000002, 'MISSOURI': 923641.0000000002, 'MONTANA': 247333.0, 'NEBRASKA': 254158.99999999997, 'NEVADA': 685073.0000000001, 'NEW HAMPSHIRE': 193436.0, 'NEW JERSEY': 1759653.0000000002, 'NEW YORK': 6263164.0, 'NORTH CAROLINA': 1851558.0000000005, 'NORTH DAKOTA': 96757.00000000001, 'OHIO': 2788134.0, 'OKLAHOMA': 79722

# 1 million CHW allocation to states

We allocate 1 million CHWs to states proportional to total Medicaid enrolles in each state.

Let's $FedCHW$ represents the number of CHW will be allocated within states by the federal government, which is 1 million in our project. $TotMed$ represents the total Medicaid enrollee numbers over the US, $Med_s$ is the total Medicaid enrollee numbers in state $s \in S$, and $CHW_s$ is the total number of CHW allocated to state $ s\in S$. 

$CHW_s = FedCHW*\frac{Med_s}{TotMed}$



In [94]:
# We consider allocation of 1 million CHW all over the US
Federal_budget_CHW = 1000000

# First, we calculate the Total Medicaid enrolles all over the US
Total_federal_need = sum(Medicaid_demand_state[s] for s in State)

# Allocate the 1 million CHWs proportional to Medicaid enrolles in each state
Medicaid_budget_state = {s: (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW  for s in State}

In [95]:
for s in State:
    print (s, Medicaid_demand_state[s], Total_federal_need, Federal_budget_CHW, (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW)

NEW MEXICO 772102.0 76256043.0 1000000 10125.12542776446
ALABAMA 957115.9999999999 76256043.0 1000000 12551.3462585516
ALASKA 231145.00000000006 76256043.0 1000000 3031.1696084204114
ARIZONA 1839932.0 76256043.0 1000000 24128.343507149984
ARKANSAS 830466.9999999999 76256043.0 1000000 10890.5073923125
CALIFORNIA 11847710.999999998 76256043.0 1000000 155367.50313676777
COLORADO 1337804.9999999998 76256043.0 1000000 17543.593233653624
CONNECTICUT 874974.0 76256043.0 1000000 11474.159497103725
DELAWARE 239009.0 76256043.0 1000000 3134.295861640762
FLORIDA 3892552.0 76256043.0 1000000 51045.81678857897
GEORGIA 1928703.000000001 76256043.0 1000000 25292.461084035018
IDAHO 340741.9999999999 76256043.0 1000000 4468.393409818024
ILLINOIS 2987495.999999999 76256043.0 1000000 39177.17052273483
INDIANA 1602975.9999999995 76256043.0 1000000 21020.97010200227
IOWA 699741.0000000003 76256043.0 1000000 9176.203910816621
KANSAS 401103.00000000006 76256043.0 1000000 5259.950349115283
KENTUCKY 1416012.99

In [96]:
print (Covid_state)

{'NEW MEXICO': 29963.0, 'ALABAMA': 28001.0, 'ALASKA': 7568.0, 'ARIZONA': 48817.0, 'ARKANSAS': 21247.0, 'CALIFORNIA': 172162.0, 'COLORADO': 61177.0, 'CONNECTICUT': 19087.0, 'DELAWARE': 6035.0, 'FLORIDA': 102157.0, 'GEORGIA': 41837.0, 'IDAHO': 16858.0, 'ILLINOIS': 135015.0, 'INDIANA': 76709.0, 'IOWA': 38970.0, 'KANSAS': 30724.0, 'KENTUCKY': 37901.0, 'LOUISIANA': 27200.0, 'MAINE': 2394.0, 'MARYLAND': 28695.0, 'MASSACHUSETTS': 32473.0, 'MICHIGAN': 86162.0, 'MINNESOTA': 81799.0, 'MISSISSIPPI': 16899.0, 'MISSOURI': 49194.0, 'MONTANA': 13788.0, 'NEBRASKA': 24900.0, 'NEVADA': 28407.0, 'NEW HAMPSHIRE': 5275.0, 'NEW JERSEY': 52428.0, 'NEW YORK': 66932.0, 'NORTH CAROLINA': 46814.0, 'NORTH DAKOTA': 13781.0, 'OHIO': 109068.0, 'OKLAHOMA': 38743.0, 'OREGON': 16473.0, 'PENNSYLVANIA': 86130.0, 'RHODE ISLAND': 11308.0, 'SOUTH CAROLINA': 19517.0, 'SOUTH DAKOTA': 13628.0, 'TENNESSEE': 45570.0, 'TEXAS': 145641.0, 'UTAH': 37679.0, 'VERMONT': 1095.0, 'VIRGINIA': 31337.0, 'WASHINGTON': 29738.0, 'WEST VIRGINIA

# Proportional allocation for different vulnerability values

Let V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}. We assume $v_j$ represent the vulnerability value for county $j \in J$, while $v_s$ represent the sum of the vulnerability values for each county in the state of county j.

$Prop_{v_j} = \frac{v_j}{v_s}*CHW_s$



In [97]:
# Calling proportional allocation function for different vulnerability criterias

# Proportional allocation according to cumulative Covid death in per capita in each county
Proportional_to_covid_death_cap = Proportional_allocation(Covid_death_capita, Total_covid_death_cap,Medicaid_budget_state)

# Propportional allocation according to Medicaid enrollee number in each county
Proportional_to_medicaid = Proportional_allocation(Medicaid_demand, Medicaid_demand_state,Medicaid_budget_state )

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_medicaid_cap = Proportional_allocation(Medicaid_capita, Total_medicaid_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases in each county
Proportional_to_covid = Proportional_allocation(COVID_14days, Covid_state, Medicaid_budget_state)

# Proportional allocation according to SVI score in each county
Proportional_to_SVI = Proportional_allocation(SVI_county, SVI_state, Medicaid_budget_state)

# Proportional allocation according to YPLL in each county
Proportional_to_YPLL = Proportional_allocation(YPLL, YPLL_state, Medicaid_budget_state)

# Proportional allocation according to Unemployment  in each county
Proportional_to_unemployment = Proportional_allocation(Unemployment, Unemployment_state, Medicaid_budget_state)

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_unemployment_cap = Proportional_allocation(Unemployment_capita, Total_unemployment_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases per capita in each county
Proportional_to_covid_capita = Proportional_allocation(Covid_capita, Total_covid_cap, Medicaid_budget_state)



# Normalize values for comparison
To be able compare the different vulnerability values for each county, we normalize all vulnerability values as follows. 

Let $m_{v_s} = \min \{v_j, \text{ for county j in state s }\}$  and 
$M_{v_s} = \max \{v_j, \text{ for county j in state s }\}$.

We calculate the normalize value for each vulnerability for each county by substracting the min vulnerability in the state of the county and dividing that by the differences between max and min value of the vulnerability values in the state. Mathematical formulation for the normalization is as follows.

$N_{v_j} = \frac{v_j - m_{v_s}}{M_{v_s} - m_{v_s}}$ 

for each $v \in V$, where V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}, j is a county in each state $s \in S$.


In [98]:
#Normalize function to normalize the vulnerability values to be able to compare them

def normalize(dict_1):
    
    result = {}
    min_data = {s: min(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    max_data = {s: max(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    
    for (j,s) in cartesian_pro_county_state:
        
        if (max_data[s] - min_data[s]) != 0 :
    
            result[j] = (dict_1[j] - min_data[s])/(max_data[s] - min_data[s])
        
        else:
            result[j] = 1
    
    return result 

# Percentile Rank

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.percentileofscore.html"> The function scipy.stats.percentileofscore (a, score, kind='rank')   </a>
computes the percentile rank of a score relative to a list of scores. 
"rank": Average percentage ranking of score. In case of multiple matches, average the percentage rankings of all matching scores.

In [99]:
from scipy import stats

# Calculate percentile ranks

def percentile_ranks(data):
    x = {s: [] for s in State}

    for (j,s) in cartesian_pro_county_state:
         
        x[s].append(data[j])
    
    
    
    percentile_ranks = {i: stats.percentileofscore(x[s], data[i], 'rank') for (i,s) in cartesian_pro_county_state}

    return percentile_ranks

In [100]:
# Write timestamp 

time_stamp = time.strftime('%m-%d-%Y %H:%M:%S')
with open('Output/time_stamp.csv','w') as f:
    w = csv.writer(f)
    now = time.strftime('%m/%d/%Y %H:%M:%S')
    w.writerow(['time',now])
    

In [101]:
#print (Medicaid_demand)

In [102]:
#Write a function to order the dicts
def order_k(dict_1):
    dict_2 = {}
    for m in location:
        if m in dict_1.keys():
            dict_2[m] = dict_1[m]
        else:
            dict_2[m] = 0
    
    return dict_2
            

In [103]:


Medicaid_demand = order_k(Medicaid_demand)#{m: Medicaid_demand[m] for m in location}
COVID_14days = order_k(COVID_14days)#{m: COVID_14days[m] for m in location}
SVI_county = order_k(SVI_county)#{m: SVI_county[m] for m in location}
YPLL = order_k(YPLL)#{m: YPLL[m] for m in location}
Unemployment = order_k(Unemployment)#{m: Unemployment[m] for m in location}
Unemployment_capita = order_k(Unemployment_capita)#{m: Unemployment_capita[m] for m in location}

In [104]:
# Write file allocation with each strategies for each county 

Strategies = ["Medicaid_demand", "Medicaid_capita", "Covid", "SVI"
              , "YPLL","Unemployment", "Unemployment_capita", "Covid_capita",  "Covid_death_capita" ]

fieldnames = []  
fieldnames.append('County_FIPS')


SVI_values = {i:SVI_county[i] for i in location}
s_count = 1
for s in Strategies:   
    fieldnames.append('Proportional_allocation_to_' + s)
    fieldnames.append(s)
    fieldnames.append('Percentile_ranks_' + s)

    
        

writefile = 'Output/County_level_proportional_allocation_for_all_policies.csv'
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(fieldnames)
    for row in zip(location
                   , Proportional_to_medicaid.values(),         Medicaid_demand.values(),      percentile_ranks(Medicaid_demand).values()
                   , Proportional_to_medicaid_cap.values(),     Medicaid_capita.values(),      percentile_ranks(Medicaid_capita).values()
                   , Proportional_to_covid.values(),            COVID_14days.values(),         percentile_ranks(COVID_14days).values()
                   , Proportional_to_SVI.values(),              SVI_county.values(),           percentile_ranks(SVI_county).values()
                   , Proportional_to_YPLL.values(),             YPLL.values(),                 percentile_ranks(YPLL).values()
                   , Proportional_to_unemployment.values(),     Unemployment.values(),         percentile_ranks(Unemployment).values()
                   , Proportional_to_unemployment_cap.values(), Unemployment_capita.values(),  percentile_ranks(Unemployment_capita).values()
                   , Proportional_to_covid_capita.values(),     Covid_capita.values(),         percentile_ranks(Covid_capita).values()
                   , Proportional_to_covid_death_cap.values(),  Covid_death_capita.values(),   percentile_ranks(Covid_death_capita).values() ):                    
       
        writer.writerow(row)

In [105]:
writefile = 'Output/State_level_allocation.csv'

cl = ['State', 'CHW_allocation']
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(cl)
    for row in zip( State, Medicaid_demand_state.values()):
        writer.writerow(row)