In [1]:
# Suzan Iloglu, May 21,2020
# Import packages
import csv
import gurobipy as gp
from itertools import product
import geopandas as gpd
import pandas as pd
import numpy as np
import math
import time
import requests
import io
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
pd.options.display.max_columns =200
from IPython.display import Image


# MAPPING THE NEW POLITICS OF CARE: COMMUNITY HEALTH WORKERS
The project presents multiple options for how individual workers in such a Community Health Corps might be distributed within each state. It shows that what you choose to prioritize greatly impacts where care would be sent. We can define communities in greatest need in many ways: we can think about our current crisis and send people to where the COVID19 pandemic rages most fiercely; we can think of long term measures of social and economic inequality embedded in metrics like the Centers for Disease Control and Prevention’s Social Vulnerability Index; we can focus on the places with too many people dying too young and use the County Health Rankings Years-of-Potential-Life-Lost measure; we can think of joblessness and how the pandemic has thrown many into unemployment and target our resources in this way. 

The followings are our options to choose to define vulnerability:


- SOCIAL VULNERABILITY INDEX
- MEDICAID 
- UNEMPLOYMENT
- YEARS OF POTENTIAL LIFE LOST
- TOTAL COVID CASES
- COVID CASES BY POPULATION
- COVID DEATHS BY POPULATION

We will start with Social Vulnerability Index (SVI) from CDC website.

### I. Importing SVI data which includes the variables for calculating county SVI for each state
The CDC uses both a USA-wide and a state by state SVI scores. For our project given that funding is likely going to be managed at a state level, using a state by state SVI scores makes the most sense and will be most sensitive to regional socioeconomic differences. Even though the CDC SVI scores are calculated using percentile rankings, the data sets include raw data estimates for each variables. The following table shows the variablaes used in the method of calculating SVI scores. 




      American Community Survey (ACS), 2014-2018 (5-year) data for the following estimates:
<img src="Data/img/SVI_comp.png" width="500">


Note: Full documentation for 2018 data is available <a href="https://svi.cdc.gov/data-and-tools-download.html">here</a> 
This part of the code shows preliminary mapping of <a href = "https://svi.cdc.gov/">the CDC's Social Vulnerability Index</a>.

Later in the notebook, we will provide the formula to create the SVI value we use in our project. First, we import the data for the US mainland and Puerto Rico.

In [2]:
## import svi data downloaded from CDC website as cited above

## 48 state SVI scores by county
svi_counties_mainland = gpd.read_file("Data/SVI2018_US_COUNTY/SVI2018_US_county.shp")

## Puerto Rico SVI scores by county
svi_counties_puerto_rico = gpd.read_file("Data/PuertoRico_COUNTY/SVI2018_PuertoRico_county.shp")

## Merge 48 states and Puerto Rico SVI 
svi_counties = pd.concat([svi_counties_mainland,svi_counties_puerto_rico ], sort = False)


In [3]:
## Replacing -999 values with 0 for calculations
svi_county = svi_counties.fillna(0)
svi_county  = svi_county.replace(-999, 0)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)

In [4]:
## Create the list for State
State = svi_county.STATE.unique().tolist()

In [5]:
# Create a seperate dictionary for the variables to calculate SVI

# Persons below poverty estimate, 2014-2018 ACS
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Civilian (age 16+) unemployed estimate, 2014-2018 ACS
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.E_UNEMP))

# Per capita income estimate, 2014-2018 ACS
E_PCI = dict(zip(svi_county.FIPS, svi_county.E_PCI))

# Persons (age 25+) with no high school diploma estimate, 2014-2018 ACS
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.E_NOHSDP))

# Persons aged 65 and older estimate
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.E_AGE65))

# Persons aged 17 and younger estimate
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.E_AGE17))

# Population with a disability estimate
E_DISABL = dict(zip(svi_county.FIPS, svi_county.E_DISABL))

# Single parent households with children under 18 estimate
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.E_SNGPNT))

# Minority (all persons except white, nonHispanic) estimate, 2014-2018 ACS
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.E_MINRTY))

# Persons (age 5+) who speak English "less than well" estimate, 2014-2018 ACS
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.E_LIMENG))

# Housing in structures with 10 or more units estimate, 2014-2018 ACS
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.E_MUNIT))

# Mobile homes estimate MOE, 2014-2018 ACS
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.E_MOBILE))

# At household level (occupied housing units), more people than rooms estimate, 2014-2018 ACS
E_CROWD = dict(zip(svi_county.FIPS, svi_county.E_CROWD))

# Households with no vehicle available estimate, 2014-2018 ACS
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.E_NOVEH))

# Persons in institutionalized group quarters estimate, 2014-2018 ACS
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.E_GROUPQ))

# Percentage of persons below poverty estimate
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Medicaid 
Medicaid is a means-tested health insurance program for low-income children, pregnant women, adults, seniors, and people with disabilities. Medicaid is jointly funded by federal and state governments and managed by states within federal standards and a wide range of state options. <a href="https://data.medicaid.gov/Enrollment/State-Medicaid-and-CHIP-Applications-Eligibility-D/n5ce-jxme"> Data Source for Medicaid Enrollment </a> 

In [6]:

import sodapy
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.medicaid.gov", None)


# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("83yt-67it", limit=4000)


# Read the medicaid demand data
#df_mm = pd.read_csv("Data/2020_06_Preliminary_applications__eligibility_determinations__and_enrollment_data.csv")

# Convert to pandas DataFrame
df_mm = pd.DataFrame.from_records(results)
df_mm.head(5)
df_mm.columns




Index(['applications_for_financial_assistance_submitted_to_the_state_based_marketplace',
       'applications_for_financial_assistance_submitted_to_the_state_based_marketplace_footnotes',
       'final_report', 'geocoded_column',
       'individuals_determined_eligible_for_chip_at_application',
       'individuals_determined_eligible_for_chip_at_application_footnotes',
       'individuals_determined_eligible_for_medicaid_at_application',
       'individuals_determined_eligible_for_medicaid_at_application_footnotes',
       'latitude', 'longitude', 'medicaid_and_chip_child_enrollment',
       'medicaid_and_chip_child_enrollment_footnotes',
       'new_applications_submitted_to_medicaid_and_chip_agencies',
       'new_applications_submitted_to_medicaid_and_chip_agencies_footnotes',
       'preliminary_updated', 'report_date', 'state_abbreviation',
       'state_expanded_medicaid', 'state_name',
       'total_applications_for_financial_assistance_submitted_at_state_level',
       'total_a

In [7]:
df_mm['State Name'] = df_mm['state_name'].str.upper() 

In [8]:
Medicaid_state = dict(zip(df_mm['State Name'], df_mm['total_medicaid_and_chip_enrollment']))
Medicaid_state['PUERTO RICO'] = 1622194
print (Medicaid_state)

{'ALABAMA': '957116', 'ALASKA': '231145', 'ARIZONA': '1839932', 'ARKANSAS': '830467', 'CALIFORNIA': '11847711', 'COLORADO': '1337805', 'CONNECTICUT': '874974', 'DELAWARE': '239009', 'DISTRICT OF COLUMBIA': '248591', 'FLORIDA': '3892552', 'GEORGIA': '1928703', 'HAWAII': '351337', 'IDAHO': '340742', 'ILLINOIS': '2987496', 'INDIANA': '1602976', 'IOWA': '699741', 'KANSAS': '401103', 'KENTUCKY': '1416013', 'LOUISIANA': '1585024', 'MAINE': '232455', 'MARYLAND': '1372695', 'MASSACHUSETTS': '1616404', 'MICHIGAN': '2439425', 'MINNESOTA': '1085778', 'MISSISSIPPI': '632427', 'MISSOURI': '923641', 'MONTANA': '247333', 'NEBRASKA': '254159', 'NEVADA': '685073', 'NEW HAMPSHIRE': '193436', 'NEW JERSEY': '1759653', 'NEW MEXICO': '772102', 'NEW YORK': '6263164', 'NORTH CAROLINA': '1851558', 'NORTH DAKOTA': '96757', 'OHIO': '2788134', 'OKLAHOMA': '797220', 'OREGON': '1053931', 'PENNSYLVANIA': '3069309', 'RHODE ISLAND': '305208', 'SOUTH CAROLINA': '1048276', 'SOUTH DAKOTA': '114059', 'TENNESSEE': '1489536

In [9]:
df_mmm = pd.read_csv("Data/ACSST5Y2018.S2704_data_with_overlays_2020-08-01T140649.csv", header=[1])
df_mmm.head(1)
#df_mmm.dtypes

Unnamed: 0,id,Geographic Area Name,Estimate!!Total!!Civilian noninstitutionalized population,Margin of Error!!Total MOE!!Civilian noninstitutionalized population,Estimate!!Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Percent Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Percent Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Total!!Medicare coverage alone or in combination,Margin of Error!!Total MOE!!Medicare coverage alone or in combination,Estimate!!Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Total!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Total!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Total!!VA health care coverage alone or in combination,Margin of Error!!Total MOE!!VA health care coverage alone or in combination,Estimate!!Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Total!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Total!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Total!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,"Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)",Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone
0,0500000US01001,"Autauga County, Alabama",54277,219,18191,965,33.5,1.8,(X),(X),10026,410,18.5,0.8,14134,125,36,59,0.3,0.4,32229,261,2155,369,6.7,1.1,7914,152,7835,155,99.0,0.7,(X),(X),9049,859,16.7,1.6,14134,125,5352,682,37.9,4.9,32229,261,2788,452,8.7,1.4,7914,152,909,183,11.5,2.4,(X),(X),1701,290,3.1,0.5,14134,125,8,13,0.1,0.1,32229,261,989,252,3.1,0.8,7914,152,704,156,8.9,2.0,12303,1299,7466,880,60.7,4.9,41915,1301,10666,650,25.4,1.5,17605,763,1001,233,5.7,1.4,3974,267,1595,316,40.1,7.6,10160,310,3801,489,37.4,5.1,4406,284,560,235,12.7,5.3,6224,268,550,209,8.8,3.3,7042,206,1095,224,15.5,3.2,7771,134,1361,253,17.5,3.3,6786,87,1363,239,20.1,3.5,4697,68,4649,73,99.0,0.7,3217,138,3217,138,100.0,1.0,(X),(X),9513,834,17.5,1.5,(X),(X),2884,414,5.3,0.8,(X),(X),6503,810,12.0,1.5,(X),(X),126,101,0.2,0.2


In [10]:
df_mmm['FIPS'] = df_mmm.id.astype(str).str[9:]
df_mmm.head(5)
df_mmm.FIPS.astype(int)
df_mmm['FIPS'] = pd.to_numeric(df_mmm['FIPS'])

In [11]:
ACI_total  = dict(zip(df_mmm['FIPS'], df_mmm["Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination"]))

In [12]:

#df_m = pd.read_csv("Data/Medicaid_Demand.csv")

# Unemployment 
The unemployment rate is calculated by the U.S. Bureau of Labor Statistics as the percentage of the civilian labor force who are without jobs and have actively sought work within the past four weeks. <a href="https://www.bls.gov/lau/laufaq.htm#Q01"> Data Source for Unemployment  </a> 

In [13]:
from io import StringIO
import datetime 
from datetime import date
from dateutil.relativedelta import relativedelta

back = date.today() + relativedelta(months= -3)
three_months_ago = back.strftime('%b-%y') 


url = 'https://www.bls.gov/web/metro/laucntycur14.txt'
s = requests.get(url).text



df_unemp = pd.read_csv(StringIO(s), sep='|',  skiprows=7, skipfooter=6, engine='python', names = ['LAUS Area Code', 'FIPS State', 'FIPS County', 'Area Title', 'Period', 'Civilian Labor Force','Employed','Unemployed_Level','Unemployed_Rate'])

df_unemp['Period'] = df_unemp['Period'].astype(str)

df_unemp['FIPS'] = df_unemp['LAUS Area Code'].str[3:8]

df_unemp = df_unemp[df_unemp['Period'].str.contains(str(three_months_ago))]

df_unemp.head(5)
#df_unemp.dtypes

Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
38627,CN0100100000000,1,1,"Autauga County, AL",Sep-20,25403,24049,1354,5.3,1001
38628,CN0100300000000,1,3,"Baldwin County, AL",Sep-20,97719,92203,5516,5.6,1003
38629,CN0100500000000,1,5,"Barbour County, AL",Sep-20,9595,8750,845,8.8,1005
38630,CN0100700000000,1,7,"Bibb County, AL",Sep-20,8655,8073,582,6.7,1007
38631,CN0100900000000,1,9,"Blount County, AL",Sep-20,24703,23688,1015,4.1,1009


In [14]:
#df_unemp[[ 'FIPS', 'Area Title', 'Period','Unemployed_Level','Unemployed_Rate']].to_csv('Data/County_employment.csv', index=False)


In [15]:


df_unemp.replace({'-', 0})

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.replace(',', '')

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].astype(str)

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.strip()

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].str.strip()

df_unemp['FIPS'] = pd.to_numeric(df_unemp['FIPS'])

df_unemp['Unemployed_Level'] = pd.to_numeric(df_unemp['Unemployed_Level'])

df_unemp['Unemployed_Rate'] = pd.to_numeric(df_unemp['Unemployed_Rate'])




# Fill NA with 0
#df_unemp = df_unemp.fillna(0)
df_unemp.tail(5)
#df_unemp.dtypes


Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
41841,CN7214500000000,72,145,"Vega Baja Municipio, PR",Sep-20,12794,11165,1629,12.7,72145
41842,CN7214700000000,72,147,"Vieques Municipio, PR",Sep-20,2503,2211,292,11.7,72147
41843,CN7214900000000,72,149,"Villalba Municipio, PR",Sep-20,6721,6064,657,9.8,72149
41844,CN7215100000000,72,151,"Yabucoa Municipio, PR",Sep-20,8043,7198,845,10.5,72151
41845,CN7215300000000,72,153,"Yauco Municipio, PR",Sep-20,9251,8199,1052,11.4,72153


In [16]:
 
## Read the Unemployment data using cvs data 
#df_unemp = pd.read_csv("Data/Unemployment.csv")

# Fill NA with 0
#df_unemp = df_unemp.fillna(0)

#df_unemp.head(5)

# Years of Potential Life Lost (YPLL)

Years of Potential Life Lost (YPLL) measures the rate of premature deaths by region. YPLL is calculated as the sum of the estimated number of years that individuals would have lived if they had not died before the age of 75 per 100,000 people. <a href="https://www.countyhealthrankings.org/sites/default/files/media/document/2020%20County%20Health%20Rankings%20Data%20-%20v2.xlsx"> Data Source for YPLL.  </a> More information about YPLL can be dounf in this <a href="https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/county-health-rankings-model/health-outcomes/length-of-life/premature-death-ypll"> link. </a> 


In [17]:
# Read the YPLL data
df_y = pd.read_csv("Data/YPLL.csv")

# Fill NA with the mean of the data
df_y = df_y.fillna(df_y.mean())


# Population

In [18]:
# Read the Population data
df_pop = pd.read_csv("Data/County_pop_2019.csv")

# Fill NA with 0
df_pop = df_pop.fillna(0)


In [19]:
# Create a dictionary for the county and population
population_county = df_pop.set_index('FIPS')['pop'].to_dict()

# Create a dictionary for the county and YPLL
YPLL = dict(zip(df_y.FIPS, df_y.YPLL))

# Create a dictionary for the county and Unemployment
Unemployment = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Level))


# Create a dictionary for the county and Community Health Workers (CHW) demand
# Note that we assume a CHW can serve 55 Medicaid patient so the demand for CHW will be

#Medicaid_demand = dict(zip(df_m.FIPS, df_m.Med_Demand))


In [20]:
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

# COVID-19 Cases & COVID-19 Cases per Capita

What are COVID-19 Cases and COVID-19 Cases per Capita?

COVID-19 cases is an absolute metric of the total number of COVID-19 cases in a county over the last fourteen days.  COVID-19 cases per 100,000 is a relative metric calculated by dividing the number of COVID-19 cases by the estimated county population and multiplying by 100,000.  Cases include both confirmed cases, based on viral testing, and probable cases, based on specific criteria for symptoms and epidemiological exposure. We use NY Times Covid data. 


In [21]:
#### Data with the most recent date in NY Times dataset:

today = time.strftime('%Y-%m-%d')
covid_data_update_date = today#'2020-07-21'#today #or enter a specific date such as '2020-07-06'


In [22]:
## 14 day period defined
data_date_dt = pd.to_datetime(covid_data_update_date,infer_datetime_format = True)

N = 14

date_N_days_ago = data_date_dt - timedelta(days = N)

date_N1_days_ago = data_date_dt - timedelta(days = N+1)

In [23]:

# URL for mainland US data
url = "http://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
s = requests.get(url).content
covid = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [24]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths
797141,2020-12-04,Ozaukee,Wisconsin,55089.0,5639,44.0
797142,2020-12-04,Pepin,Wisconsin,55091.0,518,2.0
797143,2020-12-04,Pierce,Wisconsin,55093.0,2728,23.0
797144,2020-12-04,Polk,Wisconsin,55095.0,2350,17.0
797145,2020-12-04,Portage,Wisconsin,55097.0,5075,40.0
797146,2020-12-04,Price,Wisconsin,55099.0,835,5.0
797147,2020-12-04,Racine,Wisconsin,55101.0,16074,194.0
797148,2020-12-04,Richland,Wisconsin,55103.0,942,13.0
797149,2020-12-04,Rock,Wisconsin,55105.0,10348,100.0
797150,2020-12-04,Rusk,Wisconsin,55107.0,954,7.0


Note: Since NY data is seperately available, we first read the NY data for all 5 different borough then combine with the rest of US data.

In [25]:
# URL for NY
url = "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/trends/data-by-day.csv"
#"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/data-by-day.csv"

ny = requests.get(url).content
covid_ny = pd.read_csv(io.StringIO(ny.decode('utf-8')))


covid_ny.tail(5)

Unnamed: 0,date_of_interest,CASE_COUNT,HOSPITALIZED_COUNT,DEATH_COUNT,DEATH_COUNT_PROBABLE,CASE_COUNT_7DAY_AVG,HOSP_COUNT_7DAY_AVG,DEATH_COUNT_7DAY_AVG,BX_CASE_COUNT,BX_HOSPITALIZED_COUNT,BX_DEATH_COUNT,BX_CASE_COUNT_7DAY_AVG,BX_HOSPITALIZED_COUNT_7DAY_AVG,BX_DEATH_COUNT_7DAY_AVG,BK_CASE_COUNT,BK_HOSPITALIZED_COUNT,BK_DEATH_COUNT,BK_CASE_COUNT_7DAY_AVG,BK_HOSPITALIZED_COUNT_7DAY_AVG,BK_DEATH_COUNT_7DAY_AVG,MN_CASE_COUNT,MN_HOSPITALIZED_COUNT,MN_DEATH_COUNT,MN_CASE_COUNT_7DAY_AVG,MN_HOSPITALIZED_COUNT_7DAY_AVG,MN_DEATH_COUNT_7DAY_AVG,QN_CASE_COUNT,QN_HOSPITALIZED_COUNT,QN_DEATH_COUNT,QN_CASE_COUNT_7DAY_AVG,QN_HOSPITALIZED_COUNT_7DAY_AVG,QN_DEATH_COUNT_7DAY_AVG,SI_CASE_COUNT,SI_HOSPITALIZED_COUNT,SI_DEATH_COUNT,SI_CASE_COUNT_7DAY_AVG,SI_HOSPITALIZED_COUNT_7DAY_AVG,SI_DEATH_COUNT_7DAY_AVG,INCOMPLETE
273,11/28/2020,1687,114,11,2,1732,121,12,310,22,3,304,23,2,418,26,3,464,31,3,274,19,2,290,16,2,485,36,2,461,34,3,200,11,1,214,18,2,6000
274,11/29/2020,1596,129,11,1,1800,125,12,209,28,3,308,24,2,474,33,2,484,31,3,202,16,0,295,17,2,452,31,3,488,34,3,259,21,3,226,18,3,6000
275,11/30/2020,2444,146,16,3,1830,126,12,389,44,6,304,26,3,777,27,3,515,30,3,347,25,1,288,19,2,624,40,5,493,35,3,307,10,1,229,17,3,6000
276,12/01/2020,2325,150,16,4,1881,132,12,416,40,1,314,30,3,636,32,3,530,30,3,355,18,2,288,19,1,654,44,4,514,36,3,264,16,6,236,17,3,6000
277,12/02/2020,1399,83,14,7,1761,128,12,200,21,2,285,29,2,405,17,6,502,28,3,211,10,1,267,19,1,406,21,3,486,35,3,176,14,2,221,16,3,6000


In [26]:
Kings = covid_ny[['date_of_interest', 'BK_CASE_COUNT', 'BK_DEATH_COUNT']]
Kings.rename(columns = {'BK_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BK_DEATH_COUNT': 'deaths'} , inplace=True)
Kings['county'] = 'Kings'
Kings['state'] = 'New York'
Kings['fips'] = 36047.0
#Kings.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [27]:
Bronx = covid_ny[['date_of_interest', 'BX_CASE_COUNT', 'BX_DEATH_COUNT']]
Bronx.rename(columns = {'BX_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BX_DEATH_COUNT': 'deaths'} , inplace=True)
Bronx['state'] = 'New York'
Bronx['county'] = 'Bronx'
Bronx['fips'] = 36005.0
#Bronx.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [28]:
Manhattan = covid_ny[['date_of_interest', 'MN_CASE_COUNT', 'MN_DEATH_COUNT']]
Manhattan.rename(columns = {'MN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'MN_DEATH_COUNT': 'deaths'} , inplace=True)
Manhattan['state'] = 'New York'
Manhattan['county'] = 'Manhattan'
Manhattan['fips'] = 36061.0
#Manhattan.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [29]:
Queens = covid_ny[['date_of_interest', 'QN_CASE_COUNT', 'QN_DEATH_COUNT']]
Queens.rename(columns = {'QN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'QN_DEATH_COUNT': 'deaths'} , inplace=True)
Queens['state'] = 'New York'
Queens['county'] = 'Queens'
Queens['fips'] = 36081.0
#Queens.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [30]:
Richmond = covid_ny[['date_of_interest', 'SI_CASE_COUNT', 'SI_DEATH_COUNT']]
Richmond.rename(columns = {'SI_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'SI_DEATH_COUNT': 'deaths'} , inplace=True)
Richmond['state'] = 'New York'
Richmond['county'] = 'Richmond'
Richmond['fips'] = 36085.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [31]:
covid.head(5)


Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [32]:
Bronx.tail(5)

Unnamed: 0,date,cases,deaths,state,county,fips
273,11/28/2020,310,3,New York,Bronx,36005.0
274,11/29/2020,209,3,New York,Bronx,36005.0
275,11/30/2020,389,6,New York,Bronx,36005.0
276,12/01/2020,416,1,New York,Bronx,36005.0
277,12/02/2020,200,2,New York,Bronx,36005.0


In [33]:
#Kings['deaths'] = Kings.groupby(by=['fips'])['deaths_d'].sum()
#Bronx['deaths'] = Bronx.groupby(by=['fips'])['deaths_d'].sum()
#Manhattan['deaths'] = Manhattan.groupby(by=['fips'])['deaths_d'].sum()
#Queens['deaths'] = Queens.groupby(by=['fips'])['deaths_d'].sum()
#Richmond['deaths'] = Richmond.groupby(by=['fips'])['deaths_d'].sum()

In [34]:
Kings.head(5)

Unnamed: 0,date,cases,deaths,county,state,fips
0,02/29/2020,0,0,Kings,New York,36047.0
1,03/01/2020,0,0,Kings,New York,36047.0
2,03/02/2020,0,0,Kings,New York,36047.0
3,03/03/2020,0,0,Kings,New York,36047.0
4,03/04/2020,1,0,Kings,New York,36047.0


In [35]:
#Kings = Kings.drop(['deaths_d'], axis=1)
#Bronx = Bronx.drop(['deaths_d'], axis=1)
#Manhattan = Manhattan.drop(['deaths_d'], axis=1)
#Queens = Queens.drop(['deaths_d'], axis=1)
#Richmond = Richmond.drop(['deaths_d'], axis=1)

In [36]:
Kings.tail(5)

Unnamed: 0,date,cases,deaths,county,state,fips
273,11/28/2020,418,3,Kings,New York,36047.0
274,11/29/2020,474,2,Kings,New York,36047.0
275,11/30/2020,777,3,Kings,New York,36047.0
276,12/01/2020,636,3,Kings,New York,36047.0
277,12/02/2020,405,6,Kings,New York,36047.0


In [37]:
covid['dt'] = pd.to_datetime(covid['date'], infer_datetime_format=True)
Kings['dt'] = pd.to_datetime(Kings['date'], infer_datetime_format=True)
Bronx['dt'] = pd.to_datetime(Bronx['date'], infer_datetime_format=True)
Manhattan['dt'] = pd.to_datetime(Manhattan['date'], infer_datetime_format=True)
Queens['dt'] = pd.to_datetime(Queens['date'], infer_datetime_format=True)
Richmond['dt'] = pd.to_datetime(Richmond['date'], infer_datetime_format=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

In [38]:

Kings_c = Kings[(Kings['dt']>date_N1_days_ago) & (Kings['dt']<= data_date_dt)].copy()
Queens_c = Queens[(Queens['dt']>date_N1_days_ago) & (Queens['dt']<= data_date_dt)].copy()
Bronx_c = Bronx[(Bronx['dt']>date_N1_days_ago) & (Bronx['dt']<= data_date_dt)].copy()
Manhattan_c = Manhattan[(Manhattan['dt']>date_N1_days_ago) & (Manhattan['dt']<= data_date_dt)].copy()
Richmond_c = Richmond[(Richmond['dt']>date_N1_days_ago) & (Richmond['dt']<= data_date_dt)].copy()

In [39]:
#Kings['cases'] = Kings.loc[(Kings['dt']<= data_date_dt)].groupby(['dt'])['d_cases'].sum()
#Kings['cases'] = Kings.apply(lambda x: x[(Kings['dt']<= data_date_dt)]['d_cases'].sum())

In [40]:
Kings.head(50)

Unnamed: 0,date,cases,deaths,county,state,fips,dt
0,02/29/2020,0,0,Kings,New York,36047.0,2020-02-29
1,03/01/2020,0,0,Kings,New York,36047.0,2020-03-01
2,03/02/2020,0,0,Kings,New York,36047.0,2020-03-02
3,03/03/2020,0,0,Kings,New York,36047.0,2020-03-03
4,03/04/2020,1,0,Kings,New York,36047.0,2020-03-04
5,03/05/2020,3,0,Kings,New York,36047.0,2020-03-05
6,03/06/2020,1,0,Kings,New York,36047.0,2020-03-06
7,03/07/2020,2,0,Kings,New York,36047.0,2020-03-07
8,03/08/2020,5,0,Kings,New York,36047.0,2020-03-08
9,03/09/2020,16,0,Kings,New York,36047.0,2020-03-09


In [41]:
K_c = Kings_c.groupby(['fips']).sum().reset_index()
Q_c = Queens_c.groupby(['fips']).sum().reset_index()
B_c = Bronx_c.groupby(['fips']).sum().reset_index()
M_c = Manhattan_c.groupby(['fips']).sum().reset_index()
R_c = Richmond_c.groupby(['fips']).sum().reset_index()
K_c.head(19)

Unnamed: 0,fips,cases,deaths
0,36047.0,5888,39


In [42]:
covid_death = covid[(covid['dt'] >= data_date_dt- timedelta(days = 1))]
Kings_death = Kings.groupby(by=['fips'])['deaths'].sum().reset_index()
Bronx_death = Bronx.groupby(by=['fips'])['deaths'].sum().reset_index()
Manhattan_death = Manhattan.groupby(by=['fips'])['deaths'].sum().reset_index()
Queens_death = Queens.groupby(by=['fips'])['deaths'].sum().reset_index()
Richmond_death = Richmond.groupby(by=['fips'])['deaths'].sum().reset_index()

In [43]:
# We merge the NY data with the rest of the US data
covid = pd.concat([covid, Kings, Bronx, Manhattan, Queens, Richmond], sort = False)


In [44]:
Queens.tail(14)

Unnamed: 0,date,cases,deaths,state,county,fips,dt
264,11/19/2020,442,1,New York,Queens,36081.0,2020-11-19
265,11/20/2020,442,2,New York,Queens,36081.0,2020-11-20
266,11/21/2020,308,0,New York,Queens,36081.0,2020-11-21
267,11/22/2020,268,3,New York,Queens,36081.0,2020-11-22
268,11/23/2020,585,4,New York,Queens,36081.0,2020-11-23
269,11/24/2020,506,5,New York,Queens,36081.0,2020-11-24
270,11/25/2020,603,1,New York,Queens,36081.0,2020-11-25
271,11/26/2020,171,0,New York,Queens,36081.0,2020-11-26
272,11/27/2020,612,3,New York,Queens,36081.0,2020-11-27
273,11/28/2020,485,2,New York,Queens,36081.0,2020-11-28


In [45]:

covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
228,10/14/2020,Richmond,New York,36085.0,33,0.0,2020-10-14
229,10/15/2020,Richmond,New York,36085.0,40,0.0,2020-10-15
230,10/16/2020,Richmond,New York,36085.0,36,0.0,2020-10-16
231,10/17/2020,Richmond,New York,36085.0,39,0.0,2020-10-17
232,10/18/2020,Richmond,New York,36085.0,33,1.0,2020-10-18
233,10/19/2020,Richmond,New York,36085.0,35,1.0,2020-10-19
234,10/20/2020,Richmond,New York,36085.0,38,0.0,2020-10-20
235,10/21/2020,Richmond,New York,36085.0,52,1.0,2020-10-21
236,10/22/2020,Richmond,New York,36085.0,76,0.0,2020-10-22
237,10/23/2020,Richmond,New York,36085.0,47,1.0,2020-10-23


In [46]:
print (data_date_dt)

2020-12-05 00:00:00


In [47]:

covid_death = covid_death[['fips', 'deaths']]
covid_death.head(2)

Unnamed: 0,fips,deaths
793946,1001.0,42.0
793947,1003.0,138.0


In [48]:
Kings_death.head(2)

Unnamed: 0,fips,deaths
0,36047.0,5862


In [49]:
# We merge the NY data with the rest of the US data
covid_death = pd.concat([covid_death, Kings_death, Bronx_death, Manhattan_death, Queens_death, Richmond_death], sort = False)

covid_death = covid_death.rename(columns={"deaths": "total_deaths"})


# Create a dictionary for the cumulative COVID deaths in each county
County_covid_death = dict(zip(covid_death.fips, covid_death.total_deaths))

covid_death.tail(250)

Unnamed: 0,fips,total_deaths
796946,51115.0,1.0
796947,51117.0,37.0
796948,51119.0,11.0
796949,51121.0,17.0
796950,51125.0,3.0
796951,51127.0,4.0
796952,51700.0,55.0
796953,51710.0,92.0
796954,51131.0,31.0
796955,51133.0,8.0


In [50]:
covid_death.shape

(3250, 2)

In [51]:
print (County_covid_death)

{1001.0: 42.0, 1003.0: 138.0, 1005.0: 29.0, 1007.0: 38.0, 1009.0: 46.0, 1011.0: 20.0, 1013.0: 42.0, 1015.0: 126.0, 1017.0: 54.0, 1019.0: 24.0, 1021.0: 45.0, 1023.0: 20.0, 1025.0: 22.0, 1027.0: 27.0, 1029.0: 14.0, 1031.0: 23.0, 1033.0: 48.0, 1035.0: 15.0, 1037.0: 4.0, 1039.0: 34.0, 1041.0: 30.0, 1043.0: 50.0, 1045.0: 55.0, 1047.0: 32.0, 1049.0: 43.0, 1051.0: 70.0, 1053.0: 32.0, 1055.0: 70.0, 1057.0: 16.0, 1059.0: 34.0, 1061.0: 9.0, 1063.0: 18.0, 1065.0: 32.0, 1067.0: 7.0, 1069.0: 40.0, 1071.0: 23.0, 1073.0: 515.0, 1075.0: 8.0, 1077.0: 57.0, 1079.0: 36.0, 1081.0: 67.0, 1083.0: 46.0, 1085.0: 30.0, 1087.0: 22.0, 1089.0: 153.0, 1091.0: 24.0, 1093.0: 36.0, 1095.0: 58.0, 1097.0: 372.0, 1099.0: 11.0, 1101.0: 246.0, 1103.0: 55.0, 1105.0: 7.0, 1107.0: 20.0, 1109.0: 15.0, 1111.0: 22.0, 1113.0: 3.0, 1117.0: 78.0, 1115.0: 57.0, 1119.0: 22.0, 1121.0: 57.0, 1123.0: 92.0, 1125.0: 173.0, 1127.0: 112.0, 1129.0: 22.0, 1131.0: 18.0, 1133.0: 25.0, 2013.0: 0.0, 2016.0: 0.0, 2020.0: 77.0, 2050.0: 7.0, 2060.0

In [52]:
## subset last last 15 days
covid_last15 = covid[(covid['dt']>date_N1_days_ago) & (covid['dt']<= data_date_dt)].copy()
covid_last15['dt_time_delta'] = covid_last15['dt']-data_date_dt

In [53]:
## calculate new daily cases

## sort values by county and date
covid_last15.sort_values(by=['fips','dt'],inplace=True)
## remove data with 'unknown' counties
covid_last15 = covid_last15[covid_last15['fips'].notnull()].copy()

## calculate daily difference in number of cases
covid_last15['new_cases'] = covid_last15.groupby('fips')['cases'].transform(lambda x: x.diff())
## set negative new cases to zero, this can occuer due to the disperacy in the data

covid_last15.loc[covid_last15.new_cases < 1e-6, 'new_cases'] = 0
covid_last15.sort_index(inplace = True)

In [54]:
covid_last15[covid_last15.county == 'New Haven'].head(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt,dt_time_delta,new_cases
752061,2020-11-21,New Haven,Connecticut,9009.0,25248,1190.0,2020-11-21,-14 days,
755308,2020-11-22,New Haven,Connecticut,9009.0,25248,1190.0,2020-11-22,-13 days,0.0
758555,2020-11-23,New Haven,Connecticut,9009.0,26531,1202.0,2020-11-23,-12 days,1283.0
761801,2020-11-24,New Haven,Connecticut,9009.0,26643,1207.0,2020-11-24,-11 days,112.0
765047,2020-11-25,New Haven,Connecticut,9009.0,27092,1222.0,2020-11-25,-10 days,449.0
768294,2020-11-26,New Haven,Connecticut,9009.0,27092,1222.0,2020-11-26,-9 days,0.0
771540,2020-11-27,New Haven,Connecticut,9009.0,27925,1238.0,2020-11-27,-8 days,833.0
774785,2020-11-28,New Haven,Connecticut,9009.0,27925,1238.0,2020-11-28,-7 days,0.0
778031,2020-11-29,New Haven,Connecticut,9009.0,27925,1238.0,2020-11-29,-6 days,0.0
781277,2020-11-30,New Haven,Connecticut,9009.0,29455,1256.0,2020-11-30,-5 days,1530.0


In [55]:
## select just last 14 days now that we have daily new cases with 15th day as baseline
covid_last14 = covid_last15[(covid_last15['dt'] > date_N_days_ago) & (covid_last15['dt'] <= data_date_dt)].copy()


In [56]:
## group by FIPS to get case load and follow up demand values for each county
covid_last14_stats = covid_last14.groupby(['fips'])['new_cases'].sum().reset_index(name ='total_cases')

covid_last14_stats[covid_last14_stats.fips == 9009].head(5)

Unnamed: 0,fips,total_cases
310,9009.0,7595.0


In [57]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
228,10/14/2020,Richmond,New York,36085.0,33,0.0,2020-10-14
229,10/15/2020,Richmond,New York,36085.0,40,0.0,2020-10-15
230,10/16/2020,Richmond,New York,36085.0,36,0.0,2020-10-16
231,10/17/2020,Richmond,New York,36085.0,39,0.0,2020-10-17
232,10/18/2020,Richmond,New York,36085.0,33,1.0,2020-10-18
233,10/19/2020,Richmond,New York,36085.0,35,1.0,2020-10-19
234,10/20/2020,Richmond,New York,36085.0,38,0.0,2020-10-20
235,10/21/2020,Richmond,New York,36085.0,52,1.0,2020-10-21
236,10/22/2020,Richmond,New York,36085.0,76,0.0,2020-10-22
237,10/23/2020,Richmond,New York,36085.0,47,1.0,2020-10-23


In [58]:
## group by FIPS to get case load and follow up demand values for each county
#covid_death = covid.groupby(['fips'])['deaths'].sum().reset_index(name ='total_deaths')
#covid_death.head(250)

In [59]:
## group by FIPS to get case load each month and follow up demand values for each county
covid['month'] = pd.to_datetime(covid['date'])
covid['year'] = pd.to_datetime(covid['date'])


In [60]:
#covid_last14_stats_montly = covid.groupby(['fips', covid.month.dt.month, covid.year.dt.year])['deaths'].sum().reset_index() 

#covid_last14_stats_montly['cumulative_death'] = covid_last14_stats_montly.groupby(['fips'])['deaths'].cumsum(axis = 0) 
#covid_last14_stats_montly = covid_last14_stats_montly.groupby(['fips', covid_last14_stats_montly.month,  covid_last14_stats_montly.year])['deaths'].cumsum()

#covid_last14_stats_montly.head(10)

In [61]:
# adding population information from CDC svi dataset
covid_last14_stats = covid_last14_stats.reset_index()
covid_last14_stats['fips'] = covid_last14_stats['fips'].astype(int)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)
covid_last14_stats = pd.merge(left = covid_last14_stats, right = svi_county[['E_TOTPOP','FIPS', 'STATE']], how = 'right', right_on = 'FIPS', left_on = 'fips' )
covid_last14_stats.fillna(0 , inplace=True)

In [62]:

covid_last14_stats.tail(5)

Unnamed: 0,index,fips,total_cases,E_TOTPOP,FIPS,STATE
3215,3217.0,72151.0,43.0,34149,72151,PUERTO RICO
3216,3218.0,72153.0,40.0,36439,72153,PUERTO RICO
3217,0.0,0.0,0.0,2132,2105,ALASKA
3218,0.0,0.0,0.0,689,2282,ALASKA
3219,0.0,0.0,0.0,75,15005,HAWAII


In [63]:
now = pd.to_datetime("now")

print (now)
m_now = now.month
print (m_now)

y_now = now.year

2020-12-06 03:47:21.254219
12


In [64]:
#one_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-1) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#one_month_lag_death = dict(zip(one_month_lag_cumulative_death.fips, one_month_lag_cumulative_death.cumulative_death))

#one_month_lag_cumulative_death.head(5)


In [65]:
#two_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-2) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#two_month_lag_death = dict(zip(two_month_lag_cumulative_death.fips, two_month_lag_cumulative_death.cumulative_death))


#two_month_lag_cumulative_death.head(5)

In [66]:
#three_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now - 3) & (covid_last14_stats_montly['year'] == y_now)]


# Create a dictionary for the 3 last month COVID deaths in each county
#three_month_lag_death = dict(zip(three_month_lag_cumulative_death.fips, three_month_lag_cumulative_death.cumulative_death))

#three_month_lag_cumulative_death.head(5)

#print (three_month_lag_death)

In [67]:

# Create a dictionary for the states of the given the county FIPS
county_of_states = dict(zip(svi_county.FIPS, svi_county.STATE))

# Create a dictionary for the name of the given the county FIPS
county_name = dict(zip(svi_county.FIPS, svi_county.COUNTY))

# Create the list for county FIPS, we consider counties as analogy to the center for community health workers
location = svi_county.FIPS.tolist() #[k for k in SVI_county] #[9001, 9003, 9005, 9007, 9009, 9011, 9013, 9015]#[k for k in SVI_county]




In [68]:
for j in County_covid_death:
       
    print (j, County_covid_death[j])

1001.0 42.0
1003.0 138.0
1005.0 29.0
1007.0 38.0
1009.0 46.0
1011.0 20.0
1013.0 42.0
1015.0 126.0
1017.0 54.0
1019.0 24.0
1021.0 45.0
1023.0 20.0
1025.0 22.0
1027.0 27.0
1029.0 14.0
1031.0 23.0
1033.0 48.0
1035.0 15.0
1037.0 4.0
1039.0 34.0
1041.0 30.0
1043.0 50.0
1045.0 55.0
1047.0 32.0
1049.0 43.0
1051.0 70.0
1053.0 32.0
1055.0 70.0
1057.0 16.0
1059.0 34.0
1061.0 9.0
1063.0 18.0
1065.0 32.0
1067.0 7.0
1069.0 40.0
1071.0 23.0
1073.0 515.0
1075.0 8.0
1077.0 57.0
1079.0 36.0
1081.0 67.0
1083.0 46.0
1085.0 30.0
1087.0 22.0
1089.0 153.0
1091.0 24.0
1093.0 36.0
1095.0 58.0
1097.0 372.0
1099.0 11.0
1101.0 246.0
1103.0 55.0
1105.0 7.0
1107.0 20.0
1109.0 15.0
1111.0 22.0
1113.0 3.0
1117.0 78.0
1115.0 57.0
1119.0 22.0
1121.0 57.0
1123.0 92.0
1125.0 173.0
1127.0 112.0
1129.0 22.0
1131.0 18.0
1133.0 25.0
2013.0 0.0
2016.0 0.0
2020.0 77.0
2050.0 7.0
2060.0 0.0
2068.0 0.0
2070.0 1.0
2090.0 17.0
2100.0 0.0
2110.0 3.0
2122.0 9.0
2130.0 0.0
2150.0 1.0
2158.0 2.0
2164.0 0.0
2170.0 8.0
2180.0 0.0
2185.

22075.0 20.0
22077.0 49.0
22079.0 194.0
22081.0 26.0
22083.0 32.0
22085.0 21.0
22087.0 34.0
22089.0 70.0
22091.0 4.0
22093.0 40.0
22095.0 116.0
22097.0 165.0
22099.0 73.0
22101.0 96.0
22103.0 298.0
22105.0 145.0
22107.0 3.0
22109.0 139.0
22111.0 59.0
nan 0.0
22113.0 76.0
22115.0 66.0
22117.0 80.0
22119.0 59.0
22121.0 44.0
22123.0 21.0
22125.0 25.0
22127.0 26.0
23001.0 29.0
23003.0 1.0
23005.0 71.0
23007.0 4.0
23009.0 3.0
23011.0 18.0
23013.0 4.0
23015.0 2.0
23017.0 5.0
23019.0 14.0
23021.0 1.0
23023.0 0.0
23025.0 20.0
nan 0.0
23027.0 16.0
23029.0 2.0
23031.0 34.0
24001.0 87.0
24003.0 300.0
24005.0 763.0
24510.0 591.0
24009.0 39.0
24011.0 10.0
24013.0 142.0
24015.0 50.0
24017.0 105.0
24019.0 17.0
24021.0 152.0
24023.0 8.0
24025.0 108.0
24027.0 153.0
24029.0 25.0
24031.0 977.0
24033.0 948.0
24035.0 27.0
24039.0 9.0
24037.0 65.0
24041.0 7.0
nan 27.0
24043.0 81.0
24045.0 60.0
24047.0 39.0
25001.0 197.0
25003.0 66.0
25005.0 877.0
25007.0 0.0
25009.0 1482.0
25011.0 76.0
25013.0 922.0
25015.0

72143.0 nan
72145.0 nan
72147.0 nan
72149.0 nan
72151.0 nan
72153.0 nan
44001.0 44.0
44003.0 145.0
44005.0 8.0
44007.0 1059.0
nan 67.0
44009.0 90.0
45001.0 22.0
45003.0 101.0
45005.0 9.0
45007.0 235.0
45009.0 37.0
45011.0 26.0
45013.0 94.0
45015.0 105.0
45017.0 18.0
45019.0 299.0
45021.0 69.0
45023.0 37.0
45025.0 54.0
45027.0 70.0
45029.0 51.0
45031.0 76.0
45033.0 53.0
45035.0 113.0
45037.0 24.0
45039.0 45.0
45041.0 221.0
45043.0 68.0
45045.0 419.0
45047.0 86.0
45049.0 35.0
45051.0 273.0
45053.0 23.0
45055.0 65.0
45057.0 67.0
45059.0 68.0
45061.0 40.0
45063.0 232.0
45067.0 49.0
45069.0 19.0
45065.0 8.0
45071.0 67.0
45073.0 43.0
45075.0 136.0
45077.0 93.0
45079.0 304.0
45081.0 26.0
45083.0 293.0
45085.0 103.0
45087.0 28.0
45089.0 52.0
45091.0 140.0
46003.0 5.0
46005.0 32.0
46007.0 5.0
46009.0 20.0
46011.0 18.0
46013.0 29.0
46015.0 5.0
46017.0 9.0
46019.0 14.0
46021.0 1.0
46023.0 6.0
46025.0 1.0
46027.0 11.0
46029.0 55.0
46031.0 5.0
46033.0 7.0
46035.0 41.0
46037.0 11.0
46039.0 2.0
46041

In [69]:
K_c.head()

Unnamed: 0,fips,cases,deaths
0,36047.0,5888,39


In [70]:
K = dict(zip(K_c.fips, K_c.cases))
Q = dict(zip(Q_c.fips, Q_c.cases))
B = dict(zip(B_c.fips, B_c.cases))
M = dict(zip(M_c.fips, M_c.cases))
R  = dict(zip(R_c.fips, R_c.cases))

In [71]:
print (K)

{36047.0: 5888}


In [72]:
# Create a dictionary for the county and covid cases
covid_cases_county_ny_times = dict(zip(covid_last14_stats.fips, covid_last14_stats.total_cases))
COVID_14days = {}

for j in location:
    if j in covid_cases_county_ny_times:
        COVID_14days[j] = covid_cases_county_ny_times[j] 
        print(j, covid_cases_county_ny_times[j])
        

        
    else:
        COVID_14days[j] = 0
        
        
    if j not in County_covid_death:
        print ('j',j)
        County_covid_death[j] = 0

#print (three_month_lag_death)
#print (two_month_lag_covid_death)
#print (one_month_lag_covid_death)
#for j in location:
#    if j not in three_month_lag_death:        
#        three_month_lag_death[j] = 0
        
#    if j not in two_month_lag_death:        
#        two_month_lag_death[j] = 0
        
#    if j not in one_month_lag_death:        
#        one_month_lag_death[j] = 0
        
    #if j not in County_covid_death:
    #    County_covid_death[j] = 0

35039 387.0
1001 348.0
1009 496.0
1013 79.0
1015 1015.0
1017 222.0
1031 319.0
1033 556.0
1039 232.0
1043 841.0
1045 231.0
1051 515.0
1055 1248.0
1067 73.0
1069 596.0
1071 850.0
1077 810.0
1079 198.0
1083 654.0
1089 2429.0
1095 1243.0
1097 1437.0
1103 1374.0
1111 90.0
1113 185.0
1115 735.0
1117 1649.0
1121 543.0
2261 35.0
4021 3037.0
5009 385.0
5011 145.0
5033 407.0
5037 175.0
5045 821.0
5047 148.0
5051 622.0
5053 131.0
5063 254.0
5083 210.0
5085 448.0
5087 172.0
5115 482.0
5117 39.0
5121 153.0
5125 1074.0
5131 806.0
5145 447.0
6007 793.0
6017 872.0
6023 284.0
6027 63.0
6061 1583.0
6079 933.0
6089 1593.0
6093 291.0
8019 35.0
8039 173.0
8047 17.0
8051 55.0
8057 8.0
8065 84.0
8067 770.0
8071 133.0
8075 502.0
8077 1763.0
8083 256.0
8085 515.0
8093 67.0
8099 265.0
8101 3090.0
8103 72.0
8119 181.0
8121 103.0
9005 1329.0
9007 862.0
9011 1296.0
9015 676.0
10001 1276.0
12019 1035.0
12033 1462.0
12089 471.0
12101 2168.0
12109 1387.0
12113 923.0
13015 621.0
13039 179.0
13045 656.0
13047 331.0
130

12005 882.0
12009 1912.0
12015 730.0
12017 749.0
12031 5743.0
12035 307.0
12037 11.0
12041 84.0
12045 51.0
12053 689.0
12069 970.0
12073 1442.0
12075 131.0
12083 1303.0
12091 1330.0
12103 3541.0
12105 2387.0
12111 943.0
12117 1352.0
12119 365.0
12121 163.0
12127 2128.0
12131 383.0
13013 402.0
13035 117.0
13051 671.0
13067 3232.0
13083 91.0
13103 118.0
13105 145.0
13117 1008.0
13131 70.0
13133 67.0
13137 315.0
13149 74.0
13159 93.0
13171 100.0
13177 126.0
13181 30.0
13185 629.0
13195 180.0
13201 22.0
13219 153.0
13221 62.0
13223 798.0
13237 70.0
13241 107.0
13255 264.0
13257 197.0
13275 133.0
13281 61.0
13285 215.0
13291 95.0
13293 125.0
13305 154.0
13317 61.0
16005 1033.0
16007 40.0
16009 36.0
16011 521.0
16013 182.0
16015 33.0
16019 1248.0
16027 2737.0
16029 46.0
16041 147.0
16043 99.0
16051 258.0
16059 19.0
16061 64.0
16071 48.0
16079 94.0
16081 87.0
16083 746.0
16087 133.0
17005 268.0
17009 70.0
17017 225.0
17025 252.0
17033 198.0
17037 940.0
17041 239.0
17043 8997.0
17051 569.0
170

48069 58.0
48079 11.0
48095 5.0
48101 16.0
48103 98.0
48113 16321.0
48117 473.0
48153 16.0
48169 6.0
48177 107.0
48195 29.0
48201 14197.0
48207 9.0
48249 208.0
48255 35.0
48271 11.0
48279 175.0
48301 0.0
48341 109.0
48357 121.0
48369 81.0
48371 161.0
48375 1991.0
48389 258.0
48419 10.0
48435 33.0
48437 59.0
48463 145.0
48465 277.0
48501 46.0
51027 94.0
51051 81.0
51105 202.0
51183 13.0
51195 189.0
51640 55.0
51710 550.0
51720 22.0
54005 133.0
54015 45.0
54043 80.0
1047 180.0
1087 87.0
1119 49.0
2016 5.0
2070 11.0
2164 0.0
2180 63.0
4017 982.0
4023 723.0
4027 2455.0
5017 112.0
5123 182.0
6011 150.0
8023 22.0
12043 34.0
12067 54.0
13043 9.0
13061 3.0
13065 11.0
13161 48.0
13163 55.0
13205 70.0
13243 5.0
13271 21.0
13273 27.0
20055 516.0
20175 377.0
21063 101.0
21071 321.0
21095 173.0
21127 125.0
21133 102.0
21153 182.0
21175 86.0
22029 152.0
22041 127.0
22097 735.0
22107 18.0
22117 217.0
28069 95.0
28125 31.0
28151 387.0
29069 167.0
29133 228.0
30005 58.0
31173 57.0
32015 88.0
34013 4696

In [73]:
for j in location: 
    if j in K:
        COVID_14days[j] = K[j]
        print (j, COVID_14days[j], K[j])

    if j in Q:
        COVID_14days[j] = Q[j]
        print (j, COVID_14days[j])
        
    if j in B:
        COVID_14days[j] = B[j]
        print (j, COVID_14days[j])
        
    if j in M:
        COVID_14days[j] = M[j]
        print (j, COVID_14days[j])

    if j in R:
        COVID_14days[j] = R[j]
        print (j, COVID_14days[j])

36085 2656
36061 3329
36047 5888 5888
36081 5674
36005 3589


In [74]:
print (covid_cases_county_ny_times)

{1001.0: 348.0, 1003.0: 1370.0, 1005.0: 60.0, 1007.0: 234.0, 1009.0: 496.0, 1011.0: 24.0, 1013.0: 79.0, 1015.0: 1015.0, 1017.0: 222.0, 1019.0: 103.0, 1021.0: 210.0, 1023.0: 20.0, 1025.0: 108.0, 1027.0: 72.0, 1029.0: 69.0, 1031.0: 319.0, 1033.0: 556.0, 1035.0: 64.0, 1037.0: 81.0, 1039.0: 232.0, 1041.0: 38.0, 1043.0: 841.0, 1045.0: 231.0, 1047.0: 180.0, 1049.0: 641.0, 1051.0: 515.0, 1053.0: 148.0, 1055.0: 1248.0, 1057.0: 204.0, 1059.0: 235.0, 1061.0: 137.0, 1063.0: 62.0, 1065.0: 111.0, 1067.0: 73.0, 1069.0: 596.0, 1071.0: 850.0, 1073.0: 5283.0, 1075.0: 125.0, 1077.0: 810.0, 1079.0: 198.0, 1081.0: 593.0, 1083.0: 654.0, 1085.0: 47.0, 1087.0: 87.0, 1089.0: 2429.0, 1091.0: 117.0, 1093.0: 172.0, 1095.0: 1243.0, 1097.0: 1437.0, 1099.0: 88.0, 1101.0: 892.0, 1103.0: 1374.0, 1105.0: 37.0, 1107.0: 164.0, 1109.0: 121.0, 1111.0: 90.0, 1113.0: 185.0, 1115.0: 735.0, 1117.0: 1649.0, 1119.0: 49.0, 1121.0: 543.0, 1123.0: 245.0, 1125.0: 1541.0, 1127.0: 456.0, 1129.0: 47.0, 1131.0: 38.0, 1133.0: 253.0, 201

In [75]:
#Parameters
pro_c_s = [(i,county_of_states[i]) for i in location ]
cartesian_pro_county_state = gp.tuplelist(pro_c_s)


In [76]:
df = covid_last14_stats
df['fips'] = df['fips'].astype(int)

In [77]:
###############################################################################################
######################## END calculating different types of vulnerabilities ###################

Since we allocate CHW proportional to the county values of certain vulnaribilites within state, we need a few function to help us with the calculations. 



In [78]:
# This function return the value for the state for the given dictionary

# More specifically sum upt the values for the counties of each state

def total_state(dict_1):
    state_dict = {}
    for s in State:
        state_dict [s] = sum(float(dict_1[j]) for (j,s) in cartesian_pro_county_state.select('*', s) if j in dict_1)  
    return state_dict



In [79]:
# Calculte the population per state by summing up the population in each county in the state
State_pop = total_state(population_county)

In [80]:


# This function returns the ratio of the dict value for county and state of the county
def Proportional(county_level, state_level):
    
    prop = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            prop[j] = (float(county_level[j])/float(state_level[s]))    
        else:
            prop[j] = 0
                
    return prop

In [81]:
print (ACI_total)
print (ACI_total[35039])

{1001: 9049, 1003: 30763, 1005: 7244, 1007: 4272, 1009: 9290, 1011: 2935, 1013: 4814, 1015: 25890, 1017: 7400, 1019: 5105, 1021: 9147, 1023: 3273, 1025: 6716, 1027: 3112, 1029: 3180, 1031: 9926, 1033: 9540, 1035: 2765, 1037: 2155, 1039: 6835, 1041: 3219, 1043: 15478, 1045: 10930, 1047: 13544, 1049: 17896, 1051: 12373, 1053: 6235, 1055: 21998, 1057: 4144, 1059: 6715, 1061: 6647, 1063: 3143, 1065: 4730, 1067: 3651, 1069: 22918, 1071: 11352, 1073: 126944, 1075: 3099, 1077: 15561, 1079: 6877, 1081: 23596, 1083: 14596, 1085: 3540, 1087: 5301, 1089: 48769, 1091: 5053, 1093: 6051, 1095: 20669, 1097: 90962, 1099: 5565, 1101: 52763, 1103: 22180, 1105: 3375, 1107: 4807, 1109: 6945, 1111: 4428, 1113: 14480, 1115: 12599, 1117: 20035, 1119: 4098, 1121: 19979, 1123: 9968, 1125: 34877, 1127: 13887, 1129: 4268, 1131: 4086, 1133: 5577, 2013: 444, 2016: 400, 2020: 49663, 2050: 8680, 2060: 141, 2068: 142, 2070: 1983, 2090: 12661, 2100: 606, 2105: 504, 2110: 4631, 2122: 11347, 2130: 2881, 2150: 2412, 2158

In [82]:
#ACI
ACI_State = total_state(ACI_total)

In [83]:
# Create dicts for the variables of SVI
E_POV = dict(zip(svi_county.FIPS, svi_county.EP_POV))
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.EP_UNEMP))
E_PCI = dict(zip(svi_county.FIPS, svi_county.EP_PCI))
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.EP_NOHSDP))
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.EP_AGE65))
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.EP_AGE17))
E_DISABL = dict(zip(svi_county.FIPS, svi_county.EP_DISABL))
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.EP_SNGPNT))
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.EP_MINRTY))
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.EP_LIMENG))
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.EP_MUNIT))
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.EP_MOBILE))
E_CROWD = dict(zip(svi_county.FIPS, svi_county.EP_CROWD))
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.EP_NOVEH))
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.EP_GROUPQ))


# Calculate the state value for the SVI variables
E_POV_State = total_state(E_POV)
E_UNEMP_State = total_state(E_UNEMP) 
E_PCI_State = total_state(E_PCI)
E_NOHSDP_State = total_state(E_NOHSDP) 
E_AGE65_State = total_state(E_AGE65)
E_AGE17_State = total_state(E_AGE17)
E_DISABL_State = total_state(E_DISABL)
E_SNGPNT_State = total_state(E_SNGPNT)
E_MINRTY_State = total_state(E_MINRTY)
E_LIMENG_State = total_state(E_LIMENG)
E_MUNIT_State = total_state(E_MUNIT)
E_MOBILE_State = total_state(E_MOBILE)
E_CROWD_State = total_state(E_CROWD)
E_NOVEH_State = total_state(E_NOVEH)
E_GROUPQ_State = total_state(E_GROUPQ)



# Calculate the proportinal values for the SVI variables
E_POV_Prop = Proportional(E_POV, E_POV_State )
E_UNEMP_Prop = Proportional(E_UNEMP, E_UNEMP_State ) 
E_PCI_Prop = Proportional(E_PCI, E_PCI_State )
E_NOHSDP_Prop = Proportional(E_NOHSDP, E_NOHSDP_State ) 
E_AGE65_Prop = Proportional(E_AGE65, E_AGE65_State )
E_AGE17_Prop = Proportional(E_AGE17, E_AGE17_State )
E_DISABL_Prop = Proportional(E_DISABL, E_DISABL_State )
E_SNGPNT_Prop = Proportional(E_SNGPNT, E_SNGPNT_State )
E_MINRTY_Prop = Proportional(E_MINRTY, E_MINRTY_State )
E_LIMENG_Prop = Proportional(E_LIMENG, E_LIMENG_State )
E_MUNIT_Prop = Proportional(E_MUNIT, E_MUNIT_State )
E_MOBILE_Prop = Proportional(E_MOBILE, E_MOBILE_State )
E_CROWD_Prop = Proportional(E_CROWD, E_CROWD_State )
E_NOVEH_Prop = Proportional(E_NOVEH, E_NOVEH_State )
E_GROUPQ_Prop = Proportional(E_GROUPQ, E_GROUPQ_State )

ACI_Prop = Proportional(ACI_total, ACI_State)


# SVI calculation 

We calculate the ratio of county value to state value by population for each SVI variables (we use EP-estimate percentage- values in the CDC data set), then we take the average of all 15 SVI variables. 

Let SVI variable set be K, where  

K = { Below Poverty, Unemployed, Income, No High School Diploma, Aged 65 or Older, Aged 17 or Younger, Civilian with a Disability, Single-Parent Households, Minority, Speaks English “Less than Well”, Multi-Unit Structures, Mobile Homes, Crowding, No Vehicle, Group Quarters }

We will use these variables in a county base and state base. While County base values are exactly same as the estimated values for these variables in the CDC website, to calculate the state base, we simply sum the county values for all of the counties in each state for each variable. Then we use the following formula to calculate the SVI value for each county.

Let $S$ is the set of states and $j$ is a county in the state $s$, where $s \in S$, $c^k_j$ SVI variable $k \in K$ value for county j, and $c_s$ SVI variable value for state s.

$SVI_j = \frac{1}{15}\sum_{k \in K} \frac{c^k_j}{c^k_s}$


In [84]:

from collections import Counter
# Sum all SVI variable values for each county
SVI_county_sum = dict(Counter(E_POV_Prop) + Counter(E_UNEMP_Prop) + Counter(E_PCI_Prop) + Counter(E_NOHSDP_Prop) + Counter(E_AGE65_Prop) + Counter(E_AGE17_Prop) + Counter(E_DISABL_Prop) + Counter(E_SNGPNT_Prop) + Counter(E_MINRTY_Prop) + Counter(E_LIMENG_Prop) + Counter(E_MUNIT_Prop) + Counter(E_MOBILE_Prop) + Counter(E_CROWD_Prop) + Counter(E_NOVEH_Prop) + Counter(E_GROUPQ_Prop))

# Divide the sum of all SVI variable values
SVI_county = {j: SVI_county_sum[j]/15 for j in SVI_county_sum }


# Proportional Allocation

We consider allocating 1 million CHW over the states proportional to Medicaid enrollment in each state. Further, we allocate CHW to counties in each state proportional to different county vulnerability criterias as follow.

- MEDICAID
- SVI
- YPLL
- UNEMPLOYMENT
- LAST 14 DAYS COVID CASES
- LAST 14 DAYS COVID CASES / POP
- COVID DEATHS / POP

To calculate the total number of allocated CHW to per county according to these vulnerability criterias, we define the following function called "Proportional_allocation", in which we multiply the CHW allocated to each state with the ratio of the chosen vulnerability criteria of the county to the chosen vulnerability criteria of the state, the function return a dictionary with the counties as keys and the number of CHW allocated to each county for the chosen vulnerability criteria as values. 

In [85]:
def Proportional_allocation(county_level, state_level, state_budget):
    prop_allocate = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            #print (j,s, county_level[j],state_level[s], state_budget[s])
            prop_allocate[j,s] = (float(county_level[j])/float(state_level[s]))*float(state_budget[s])
        
        else:
            prop_allocate[j,s] = 0
            
    
    return prop_allocate

In [86]:
Medicaid_dem = Proportional_allocation(ACI_total, ACI_State, Medicaid_state)
Medicaid_demand = {m[0]: Medicaid_dem[m] for m in Medicaid_dem}
for s in State:
#    print (s)
#    print ( ACI_State[s])
    print (s,Medicaid_state[s])
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

NEW MEXICO 772102
ALABAMA 957116
ALASKA 231145
ARIZONA 1839932
ARKANSAS 830467
CALIFORNIA 11847711
COLORADO 1337805
CONNECTICUT 874974
DELAWARE 239009
FLORIDA 3892552
GEORGIA 1928703
IDAHO 340742
ILLINOIS 2987496
INDIANA 1602976
IOWA 699741
KANSAS 401103
KENTUCKY 1416013
LOUISIANA 1585024
MAINE 232455
MARYLAND 1372695
MASSACHUSETTS 1616404
MICHIGAN 2439425
MINNESOTA 1085778
MISSISSIPPI 632427
MISSOURI 923641
MONTANA 247333
NEBRASKA 254159
NEVADA 685073
NEW HAMPSHIRE 193436
NEW JERSEY 1759653
NEW YORK 6263164
NORTH CAROLINA 1851558
NORTH DAKOTA 96757
OHIO 2788134
OKLAHOMA 797220
OREGON 1053931
PENNSYLVANIA 3069309
RHODE ISLAND 305208
SOUTH CAROLINA 1048276
SOUTH DAKOTA 114059
TENNESSEE 1489536
TEXAS 4457644
UTAH 338812
VERMONT 161049
VIRGINIA 1497770
WASHINGTON 1780968
WEST VIRGINIA 521290
WISCONSIN 1112844
WYOMING 59302
HAWAII 351337
DISTRICT OF COLUMBIA 248591
PUERTO RICO 1622194


In [87]:
#print ('Med', Medicaid_demand[35039])
#print ('Med', Medicaid_demand[4017], 'Pop', population_county[4017], 'Med_capita', 100000*(Medicaid_demand[4017]/population_county[4017]))

In [88]:
#Further we create additional vulnerability values by considering SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita and the number of Medicaid enrolles in each county together

Covid_capita = {j: 100000*(COVID_14days[j]/population_county[j]) for j in location}

Covid_death_capita = {j: 100000*(County_covid_death[j]/population_county[j]) for j in location}

Medicaid_capita = {j: 100000*(Medicaid_demand[j]/population_county[j]) for j in location}

Unemployment_capita = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Rate))

#Three_month_lag_covid_death_capita = {j: 100000*(three_month_lag_death[j]/population_county[j]) for j in location}

#Two_month_lag_covid_death_capita = {j: 100000*(two_month_lag_death[j]/population_county[j]) for j in location}

#One_month_lag_covid_death_capita = {j: 100000*(one_month_lag_death[j]/population_county[j]) for j in location}

In [89]:
for j in location:
    print (j, County_covid_death[j], population_county[j], Covid_death_capita[j])

35039 25.0 38921 64.2326764471622
1001 42.0 55869 75.17585781023465
1009 46.0 57826 79.54899180299519
1013 42.0 19448 215.9605100781571
1015 126.0 113605 110.91061132872673
1017 54.0 33254 162.3864798219763
1031 23.0 52342 43.94176760536472
1033 48.0 55241 86.89198240437356
1039 34.0 37049 91.7703581743097
1043 50.0 83768 59.68866392894661
1045 55.0 49172 111.85227365167168
1051 70.0 81209 86.19734265906489
1055 70.0 102268 68.44760824500332
1067 7.0 17205 40.68584713746004
1069 40.0 105882 37.77790370412346
1071 23.0 51626 44.55119513423469
1077 57.0 92729 61.469443216253815
1079 36.0 32924 109.34272870854088
1083 46.0 98915 46.504574634787446
1089 153.0 372909 41.02877645752717
1095 58.0 96774 59.93345320023973
1097 372.0 413210 90.02686285423877
1103 55.0 119679 45.95626634580837
1111 22.0 22722 96.82246281137223
1113 3.0 57961 5.175894135711944
1115 57.0 89512 63.67861292340692
1117 78.0 217702 35.828793488346456
1121 57.0 79978 71.26959913976344
2261 3.0 9202 32.60160834601174
402

27005 21.0 34423 61.005722917816584
27015 15.0 25008 59.980806142034545
27017 18.0 35871 50.1798109893786
27023 9.0 11800 76.27118644067797
27025 17.0 56579 30.0464836776896
27029 10.0 8818 113.40440009072353
27033 4.0 11196 35.72704537334762
27035 40.0 65055 61.48643455537621
27041 41.0 38141 107.49587058545922
27043 3.0 13653 21.973192704900022
27045 0.0 21067 0.0
27047 7.0 30281 23.116805917902315
27049 33.0 46340 71.21277514026758
27051 6.0 5972 100.46885465505693
27055 4.0 18600 21.50537634408602
27059 19.0 40596 46.802640654251654
27061 24.0 45130 53.17970307999113
27063 1.0 9846 10.156408693885842
27065 14.0 16337 85.69504805043766
27067 30.0 43199 69.44605199194426
27069 9.0 4298 209.39972080037225
27071 6.0 12229 49.063701038515
27079 12.0 28887 41.54117769238758
27085 21.0 35893 58.50722982197085
27089 10.0 9336 107.11225364181662
27091 22.0 19683 111.77157953563989
27093 12.0 23222 51.67513564723107
27097 31.0 33386 92.85329179895764
27107 8.0 6375 125.49019607843138
27111 2

39045 67.0 157574 42.519705027479155
39051 31.0 42126 73.58875753691306
39053 15.0 29898 50.17057997190447
39055 51.0 93649 54.45867014063151
39057 79.0 168937 46.76299448906989
39059 15.0 38875 38.58520900321543
39063 51.0 75783 67.29741498753019
39065 26.0 31365 82.89494659652479
39067 3.0 15040 19.946808510638295
39069 29.0 27006 107.38354439754129
39071 18.0 43161 41.70431639674707
39073 18.0 28264 63.685253325785446
39077 22.0 58266 37.757869083170284
39081 16.0 65325 24.492920015308076
39083 20.0 62322 32.09139629665287
39085 81.0 230149 35.1945913299645
39089 79.0 176862 44.667593943300425
39091 21.0 45672 45.98003152916448
39093 114.0 309833 36.79401484025265
39099 305.0 228683 133.37239759842228
39103 61.0 179746 33.9367774526276
39107 60.0 41172 145.7301078402798
39109 69.0 106987 64.49381700580444
39111 22.0 13654 161.12494507104145
39113 275.0 531687 51.72215984216278
39117 2.0 35328 5.661231884057971
39119 15.0 86215 17.39836455373195
39123 32.0 40525 78.96360271437385
391

2275 0.0 2502 0.0
4003 88.0 125922 69.88453169422341
4019 722.0 1047279 68.94055929699726
4025 128.0 235099 54.445148639509306
5003 15.0 19657 76.30869410388158
5005 30.0 41932 71.5444052275112
5007 169.0 279141 60.54287976327376
5015 23.0 28380 81.0429880197322
5019 12.0 22320 53.763440860215056
5023 25.0 24919 100.32505317227817
5025 15.0 7956 188.5369532428356
5029 8.0 20846 38.37666698647222
5031 83.0 110332 75.22749519631657
5043 8.0 18219 43.910203633569346
5055 29.0 45325 63.98234969663541
5059 42.0 33771 124.36706049569156
5061 15.0 13202 113.61914861384638
5071 13.0 26578 48.9126345097449
5073 3.0 6624 45.289855072463766
5075 31.0 16406 188.9552602706327
5081 37.0 12259 301.8190717024227
5091 22.0 43257 50.85882053771644
5103 14.0 23382 59.875117611838164
5105 4.0 10455 38.25920612147298
5109 3.0 10718 27.99029669714499
5139 59.0 38682 152.52572255829585
5143 179.0 239187 74.83684313946829
6009 22.0 45905 47.92506262934321
6033 21.0 64386 32.615786040443574
6035 2.0 30573 6.54

51003 27.0 109330 24.69587487423397
51036 7.0 6963 100.53138015223323
51037 5.0 11880 42.08754208754208
51087 260.0 330818 78.59306325532468
51091 0.0 2190 0.0
51103 1.0 10603 9.431293030274452
51115 1.0 8834 11.319900384876613
51117 37.0 30587 120.96642364403176
51119 11.0 10582 103.95010395010395
51125 3.0 14930 20.093770931011385
51131 31.0 11710 264.73099914602903
51133 8.0 12095 66.14303431169905
51145 6.0 29652 20.234722784297855
51147 16.0 22802 70.16928339619331
51149 7.0 38353 18.25150574922431
51157 2.0 7370 27.137042062415198
51159 6.0 9023 66.49673057741329
51161 35.0 94186 37.1605121780307
51163 2.0 22573 8.860142648296637
51169 15.0 21566 69.55392747843828
51173 35.0 30104 116.26361945256444
51179 22.0 152882 14.39018327860703
51185 7.0 40595 17.24350289444513
51570 26.0 17370 149.68336211859528
51610 6.0 14617 41.04809468427174
51670 9.0 22529 39.94851080829153
51790 13.0 24932 52.14182576608375
53005 150.0 204390 73.38910905621606
53007 31.0 77200 40.15544041450777
5300

39163 6.0 13085 45.85403133358808
40005 1.0 13758 7.268498328245384
40023 4.0 14672 27.262813522355508
40059 2.0 3688 54.229934924078094
40067 2.0 6002 33.32222592469177
40089 46.0 32832 140.10721247563353
40095 4.0 16931 23.625302699190833
40141 7.0 7250 96.55172413793103
41011 3.0 64487 4.652100423341138
41021 0.0 1912 0.0
41023 1.0 7199 13.890818169190165
41033 5.0 87487 5.715134820030404
41047 158.0 347818 45.426056155805625
41055 0.0 1780 0.0
41059 50.0 77950 64.14368184733803
41069 0.0 1332 0.0
42003 570.0 1216045 46.87326538080416
42045 873.0 566747 154.03698652132258
45001 22.0 24527 89.69706853671464
45013 94.0 192122 48.9272441469483
45029 51.0 37677 135.36109562863285
45081 26.0 20473 126.99653201777951
45085 103.0 106721 96.5133385181923
45087 28.0 27316 102.50402694391566
46011 18.0 35077 51.31567693930496
46049 10.0 2299 434.97172683775557
46083 55.0 61128 89.97513414474545
46089 1.0 2379 42.034468263976464
46107 2.0 2153 92.89363678588016
46109 20.0 10394 192.41870309794

47029 30.0 36004 83.32407510276636
47129 7.0 21403 32.705695463252816
48055 39.0 43664 89.3184316599487
48105 7.0 3464 202.07852193995382
48115 37.0 12728 290.69767441860466
48137 2.0 1932 103.51966873706004
48165 18.0 21492 83.75209380234506
48273 55.0 30680 179.26988265971315
48291 69.0 88219 78.2144436005849
48313 15.0 14284 105.01260151218146
48329 150.0 176832 84.82627578718784
48345 2.0 1200 166.66666666666669
48409 86.0 66730 128.87756631200358
48413 4.0 2793 143.2151808091658
48439 1077.0 2102515 51.22436700808318
48443 1.0 776 128.8659793814433
48449 41.0 32750 125.19083969465649
48457 5.0 21672 23.071244001476558
48471 71.0 72971 97.29892697098848
48473 19.0 55246 34.39163016326974
48495 6.0 8010 74.90636704119851
49039 5.0 30939 16.160832606095866
51029 10.0 17148 58.315838581758804
51081 16.0 11336 141.14326040931547
51153 235.0 470335 49.96438708580054
51590 51.0 40044 127.35990410548396
51620 17.0 7967 213.38019329735155
51660 38.0 53016 71.67647502640712
51683 28.0 41085

12047 11.0 14428 76.24064319378985
13193 16.0 12947 123.5807522978296
13239 1.0 2299 43.49717268377555
13259 17.0 6621 256.7587977646881
21119 9.0 14806 60.786167769823045
21147 1.0 17231 5.803493703209332
21159 9.0 11195 80.39303260384101
21237 1.0 7157 13.97233477714126
22035 11.0 6861 160.32648301996792
28051 61.0 17010 358.61258083480305
28053 19.0 8064 235.61507936507934
28103 17.0 10417 163.19477776711145
28133 56.0 25110 223.01871764237356
28163 41.0 29690 138.0936342202762
30085 42.0 11004 381.6793893129771
35006 49.0 26675 183.6925960637301
35029 30.0 23709 126.53422750854106
46017 9.0 1962 458.7155963302753
46031 5.0 4086 122.36906510034262
47095 7.0 7016 99.77194982896238
48061 1132.0 423163 267.50921039882974
48215 2090.0 868707 240.58744778158803
48377 9.0 6704 134.24821002386633
48445 28.0 12337 226.95955256545352
48479 417.0 276652 150.7308821190521
48489 61.0 21358 285.60726659799604
53001 12.0 19983 60.05104338687885
54047 2.0 17624 11.348161597821154
1085 30.0 9726 30

In [90]:
County_covid_death[9009], population_county[9009], Covid_death_capita[9009]

(1291.0, 854757, 151.0370783743216)

In [91]:
# Dictionary for the total Covid per capita for each state
Total_covid_cap = total_state(Covid_capita) 


# Dictionary for the total Covid death per capita for each state
Total_covid_death_cap = total_state(Covid_death_capita) 


# Dictionary for the total Medicaid per capita for each state
Total_medicaid_cap = total_state(Medicaid_capita)


# Dictionary for the total Unemployment per capita for each state
Total_unemployment_cap = total_state(Unemployment_capita)



In [92]:
# Dictionaries for the different vulnerability criteria values for states

# Dictionary for total Medicaid patient numbers for each state
Medicaid_demand_state = total_state(Medicaid_demand) 

# Dictionary for total positive COVID cases for last 14 days in each state
Covid_state = total_state(COVID_14days) 

# Dictionary for total SVI values for each state
SVI_state = total_state(SVI_county) 

# Dictionary for total YPLL values for each state
YPLL_state = total_state(YPLL) 

# Dictionary for total Unemployment numbers for each state
Unemployment_state = total_state(Unemployment)  

In [93]:
print(Medicaid_demand_state)

{'NEW MEXICO': 772102.0, 'ALABAMA': 957115.9999999999, 'ALASKA': 231145.00000000006, 'ARIZONA': 1839932.0, 'ARKANSAS': 830466.9999999999, 'CALIFORNIA': 11847710.999999998, 'COLORADO': 1337804.9999999998, 'CONNECTICUT': 874974.0, 'DELAWARE': 239009.0, 'FLORIDA': 3892552.0, 'GEORGIA': 1928703.000000001, 'IDAHO': 340741.9999999999, 'ILLINOIS': 2987495.999999999, 'INDIANA': 1602975.9999999995, 'IOWA': 699741.0000000003, 'KANSAS': 401103.00000000006, 'KENTUCKY': 1416012.9999999995, 'LOUISIANA': 1585023.9999999998, 'MAINE': 232455.00000000003, 'MARYLAND': 1372695.0, 'MASSACHUSETTS': 1616404.0, 'MICHIGAN': 2439425.0000000005, 'MINNESOTA': 1085777.9999999998, 'MISSISSIPPI': 632427.0000000002, 'MISSOURI': 923641.0000000002, 'MONTANA': 247333.0, 'NEBRASKA': 254158.99999999997, 'NEVADA': 685073.0000000001, 'NEW HAMPSHIRE': 193436.0, 'NEW JERSEY': 1759653.0000000002, 'NEW YORK': 6263164.0, 'NORTH CAROLINA': 1851558.0000000005, 'NORTH DAKOTA': 96757.00000000001, 'OHIO': 2788134.0, 'OKLAHOMA': 79722

# 1 million CHW allocation to states

We allocate 1 million CHWs to states proportional to total Medicaid enrolles in each state.

Let's $FedCHW$ represents the number of CHW will be allocated within states by the federal government, which is 1 million in our project. $TotMed$ represents the total Medicaid enrollee numbers over the US, $Med_s$ is the total Medicaid enrollee numbers in state $s \in S$, and $CHW_s$ is the total number of CHW allocated to state $ s\in S$. 

$CHW_s = FedCHW*\frac{Med_s}{TotMed}$



In [94]:
# We consider allocation of 1 million CHW all over the US
Federal_budget_CHW = 1000000

# First, we calculate the Total Medicaid enrolles all over the US
Total_federal_need = sum(Medicaid_demand_state[s] for s in State)

# Allocate the 1 million CHWs proportional to Medicaid enrolles in each state
Medicaid_budget_state = {s: (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW  for s in State}

In [95]:
for s in State:
    print (s, Medicaid_demand_state[s], Total_federal_need, Federal_budget_CHW, (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW)

NEW MEXICO 772102.0 76256043.0 1000000 10125.12542776446
ALABAMA 957115.9999999999 76256043.0 1000000 12551.3462585516
ALASKA 231145.00000000006 76256043.0 1000000 3031.1696084204114
ARIZONA 1839932.0 76256043.0 1000000 24128.343507149984
ARKANSAS 830466.9999999999 76256043.0 1000000 10890.5073923125
CALIFORNIA 11847710.999999998 76256043.0 1000000 155367.50313676777
COLORADO 1337804.9999999998 76256043.0 1000000 17543.593233653624
CONNECTICUT 874974.0 76256043.0 1000000 11474.159497103725
DELAWARE 239009.0 76256043.0 1000000 3134.295861640762
FLORIDA 3892552.0 76256043.0 1000000 51045.81678857897
GEORGIA 1928703.000000001 76256043.0 1000000 25292.461084035018
IDAHO 340741.9999999999 76256043.0 1000000 4468.393409818024
ILLINOIS 2987495.999999999 76256043.0 1000000 39177.17052273483
INDIANA 1602975.9999999995 76256043.0 1000000 21020.97010200227
IOWA 699741.0000000003 76256043.0 1000000 9176.203910816621
KANSAS 401103.00000000006 76256043.0 1000000 5259.950349115283
KENTUCKY 1416012.99

In [96]:
print (Covid_state)

{'NEW MEXICO': 25497.0, 'ALABAMA': 33494.0, 'ALASKA': 8004.0, 'ARIZONA': 58058.0, 'ARKANSAS': 23345.0, 'CALIFORNIA': 208352.0, 'COLORADO': 57749.0, 'CONNECTICUT': 25902.0, 'DELAWARE': 7081.0, 'FLORIDA': 106710.0, 'GEORGIA': 45179.0, 'IDAHO': 17434.0, 'ILLINOIS': 124238.0, 'INDIANA': 78261.0, 'IOWA': 32172.0, 'KANSAS': 34234.0, 'KENTUCKY': 39491.0, 'LOUISIANA': 30521.0, 'MAINE': 2713.0, 'MARYLAND': 29396.0, 'MASSACHUSETTS': 39974.0, 'MICHIGAN': 94976.0, 'MINNESOTA': 75968.0, 'MISSISSIPPI': 19126.0, 'MISSOURI': 46715.0, 'MONTANA': 12035.0, 'NEBRASKA': 23672.0, 'NEVADA': 30484.0, 'NEW HAMPSHIRE': 6262.0, 'NEW JERSEY': 54491.0, 'NEW YORK': 83862.0, 'NORTH CAROLINA': 51099.0, 'NORTH DAKOTA': 10416.0, 'OHIO': 113677.0, 'OKLAHOMA': 37985.0, 'OREGON': 17773.0, 'PENNSYLVANIA': 97454.0, 'RHODE ISLAND': 12060.0, 'SOUTH CAROLINA': 20996.0, 'SOUTH DAKOTA': 12191.0, 'TENNESSEE': 54822.0, 'TEXAS': 158765.0, 'UTAH': 35558.0, 'VERMONT': 1220.0, 'VIRGINIA': 31720.0, 'WASHINGTON': 34124.0, 'WEST VIRGINIA

# Proportional allocation for different vulnerability values

Let V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}. We assume $v_j$ represent the vulnerability value for county $j \in J$, while $v_s$ represent the sum of the vulnerability values for each county in the state of county j.

$Prop_{v_j} = \frac{v_j}{v_s}*CHW_s$



In [97]:
# Calling proportional allocation function for different vulnerability criterias

# Proportional allocation according to cumulative Covid death in per capita in each county
Proportional_to_covid_death_cap = Proportional_allocation(Covid_death_capita, Total_covid_death_cap,Medicaid_budget_state)

# Propportional allocation according to Medicaid enrollee number in each county
Proportional_to_medicaid = Proportional_allocation(Medicaid_demand, Medicaid_demand_state,Medicaid_budget_state )

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_medicaid_cap = Proportional_allocation(Medicaid_capita, Total_medicaid_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases in each county
Proportional_to_covid = Proportional_allocation(COVID_14days, Covid_state, Medicaid_budget_state)

# Proportional allocation according to SVI score in each county
Proportional_to_SVI = Proportional_allocation(SVI_county, SVI_state, Medicaid_budget_state)

# Proportional allocation according to YPLL in each county
Proportional_to_YPLL = Proportional_allocation(YPLL, YPLL_state, Medicaid_budget_state)

# Proportional allocation according to Unemployment  in each county
Proportional_to_unemployment = Proportional_allocation(Unemployment, Unemployment_state, Medicaid_budget_state)

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_unemployment_cap = Proportional_allocation(Unemployment_capita, Total_unemployment_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases per capita in each county
Proportional_to_covid_capita = Proportional_allocation(Covid_capita, Total_covid_cap, Medicaid_budget_state)



# Normalize values for comparison
To be able compare the different vulnerability values for each county, we normalize all vulnerability values as follows. 

Let $m_{v_s} = \min \{v_j, \text{ for county j in state s }\}$  and 
$M_{v_s} = \max \{v_j, \text{ for county j in state s }\}$.

We calculate the normalize value for each vulnerability for each county by substracting the min vulnerability in the state of the county and dividing that by the differences between max and min value of the vulnerability values in the state. Mathematical formulation for the normalization is as follows.

$N_{v_j} = \frac{v_j - m_{v_s}}{M_{v_s} - m_{v_s}}$ 

for each $v \in V$, where V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}, j is a county in each state $s \in S$.


In [98]:
#Normalize function to normalize the vulnerability values to be able to compare them

def normalize(dict_1):
    
    result = {}
    min_data = {s: min(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    max_data = {s: max(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    
    for (j,s) in cartesian_pro_county_state:
        
        if (max_data[s] - min_data[s]) != 0 :
    
            result[j] = (dict_1[j] - min_data[s])/(max_data[s] - min_data[s])
        
        else:
            result[j] = 1
    
    return result 

# Percentile Rank

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.percentileofscore.html"> The function scipy.stats.percentileofscore (a, score, kind='rank')   </a>
computes the percentile rank of a score relative to a list of scores. 
"rank": Average percentage ranking of score. In case of multiple matches, average the percentage rankings of all matching scores.

In [99]:
from scipy import stats

# Calculate percentile ranks

def percentile_ranks(data):
    x = {s: [] for s in State}

    for (j,s) in cartesian_pro_county_state:
         
        x[s].append(data[j])
    
    
    
    percentile_ranks = {i: stats.percentileofscore(x[s], data[i], 'rank') for (i,s) in cartesian_pro_county_state}

    return percentile_ranks

In [100]:
# Write timestamp 

time_stamp = time.strftime('%m-%d-%Y %H:%M:%S')
with open('Output/time_stamp.csv','w') as f:
    w = csv.writer(f)
    now = time.strftime('%m/%d/%Y %H:%M:%S')
    w.writerow(['time',now])
    

In [101]:
#print (Medicaid_demand)

In [102]:
#Write a function to order the dicts
def order_k(dict_1):
    dict_2 = {}
    for m in location:
        if m in dict_1.keys():
            dict_2[m] = dict_1[m]
        else:
            dict_2[m] = 0
    
    return dict_2
            

In [103]:


Medicaid_demand = order_k(Medicaid_demand)#{m: Medicaid_demand[m] for m in location}
COVID_14days = order_k(COVID_14days)#{m: COVID_14days[m] for m in location}
SVI_county = order_k(SVI_county)#{m: SVI_county[m] for m in location}
YPLL = order_k(YPLL)#{m: YPLL[m] for m in location}
Unemployment = order_k(Unemployment)#{m: Unemployment[m] for m in location}
Unemployment_capita = order_k(Unemployment_capita)#{m: Unemployment_capita[m] for m in location}

In [104]:
# Write file allocation with each strategies for each county 

Strategies = ["Medicaid_demand", "Medicaid_capita", "Covid", "SVI"
              , "YPLL","Unemployment", "Unemployment_capita", "Covid_capita",  "Covid_death_capita" ]

fieldnames = []  
fieldnames.append('County_FIPS')


SVI_values = {i:SVI_county[i] for i in location}
s_count = 1
for s in Strategies:   
    fieldnames.append('Proportional_allocation_to_' + s)
    fieldnames.append(s)
    fieldnames.append('Percentile_ranks_' + s)

    
        

writefile = 'Output/County_level_proportional_allocation_for_all_policies.csv'
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(fieldnames)
    for row in zip(location
                   , Proportional_to_medicaid.values(),         Medicaid_demand.values(),      percentile_ranks(Medicaid_demand).values()
                   , Proportional_to_medicaid_cap.values(),     Medicaid_capita.values(),      percentile_ranks(Medicaid_capita).values()
                   , Proportional_to_covid.values(),            COVID_14days.values(),         percentile_ranks(COVID_14days).values()
                   , Proportional_to_SVI.values(),              SVI_county.values(),           percentile_ranks(SVI_county).values()
                   , Proportional_to_YPLL.values(),             YPLL.values(),                 percentile_ranks(YPLL).values()
                   , Proportional_to_unemployment.values(),     Unemployment.values(),         percentile_ranks(Unemployment).values()
                   , Proportional_to_unemployment_cap.values(), Unemployment_capita.values(),  percentile_ranks(Unemployment_capita).values()
                   , Proportional_to_covid_capita.values(),     Covid_capita.values(),         percentile_ranks(Covid_capita).values()
                   , Proportional_to_covid_death_cap.values(),  Covid_death_capita.values(),   percentile_ranks(Covid_death_capita).values() ):                    
       
        writer.writerow(row)

In [105]:
writefile = 'Output/State_level_allocation.csv'

cl = ['State', 'CHW_allocation']
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(cl)
    for row in zip( State, Medicaid_demand_state.values()):
        writer.writerow(row)