In [149]:
# Suzan Iloglu, May 21,2020
# Import packages
import csv
import gurobipy as gp
from itertools import product
import geopandas as gpd
import pandas as pd
import numpy as np
import math
import time
import requests
import io
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
pd.options.display.max_columns =200
from IPython.display import Image


# MAPPING THE NEW POLITICS OF CARE: COMMUNITY HEALTH WORKERS
The project presents multiple options for how individual workers in such a Community Health Corps might be distributed within each state. It shows that what you choose to prioritize greatly impacts where care would be sent. We can define communities in greatest need in many ways: we can think about our current crisis and send people to where the COVID19 pandemic rages most fiercely; we can think of long term measures of social and economic inequality embedded in metrics like the Centers for Disease Control and Prevention’s Social Vulnerability Index; we can focus on the places with too many people dying too young and use the County Health Rankings Years-of-Potential-Life-Lost measure; we can think of joblessness and how the pandemic has thrown many into unemployment and target our resources in this way. 

The followings are our options to choose to define vulnerability:


- SOCIAL VULNERABILITY INDEX
- MEDICAID 
- UNEMPLOYMENT
- YEARS OF POTENTIAL LIFE LOST
- TOTAL COVID CASES
- COVID CASES BY POPULATION
- COVID DEATHS BY POPULATION

We will start with Social Vulnerability Index (SVI) from CDC website.

### I. Importing SVI data which includes the variables for calculating county SVI for each state
The CDC uses both a USA-wide and a state by state SVI scores. For our project given that funding is likely going to be managed at a state level, using a state by state SVI scores makes the most sense and will be most sensitive to regional socioeconomic differences. Even though the CDC SVI scores are calculated using percentile rankings, the data sets include raw data estimates for each variables. The following table shows the variablaes used in the method of calculating SVI scores. 




      American Community Survey (ACS), 2014-2018 (5-year) data for the following estimates:
<img src="Data/img/SVI_comp.png" width="500">


Note: Full documentation for 2018 data is available <a href="https://svi.cdc.gov/data-and-tools-download.html">here</a> 
This part of the code shows preliminary mapping of <a href = "https://svi.cdc.gov/">the CDC's Social Vulnerability Index</a>.

Later in the notebook, we will provide the formula to create the SVI value we use in our project. First, we import the data for the US mainland and Puerto Rico.

In [150]:
## import svi data downloaded from CDC website as cited above

## 48 state SVI scores by county
svi_counties_mainland = gpd.read_file("Data/SVI2018_US_COUNTY/SVI2018_US_county.shp")

## Puerto Rico SVI scores by county
svi_counties_puerto_rico = gpd.read_file("Data/PuertoRico_COUNTY/SVI2018_PuertoRico_county.shp")

## Merge 48 states and Puerto Rico SVI 
svi_counties = pd.concat([svi_counties_mainland,svi_counties_puerto_rico ], sort = False)


In [151]:
## Replacing -999 values with 0 for calculations
svi_county = svi_counties.fillna(0)
svi_county  = svi_county.replace(-999, 0)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)

In [152]:
## Create the list for State
State = svi_county.STATE.unique().tolist()

In [153]:
# Create a seperate dictionary for the variables to calculate SVI

# Persons below poverty estimate, 2014-2018 ACS
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Civilian (age 16+) unemployed estimate, 2014-2018 ACS
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.E_UNEMP))

# Per capita income estimate, 2014-2018 ACS
E_PCI = dict(zip(svi_county.FIPS, svi_county.E_PCI))

# Persons (age 25+) with no high school diploma estimate, 2014-2018 ACS
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.E_NOHSDP))

# Persons aged 65 and older estimate
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.E_AGE65))

# Persons aged 17 and younger estimate
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.E_AGE17))

# Population with a disability estimate
E_DISABL = dict(zip(svi_county.FIPS, svi_county.E_DISABL))

# Single parent households with children under 18 estimate
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.E_SNGPNT))

# Minority (all persons except white, nonHispanic) estimate, 2014-2018 ACS
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.E_MINRTY))

# Persons (age 5+) who speak English "less than well" estimate, 2014-2018 ACS
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.E_LIMENG))

# Housing in structures with 10 or more units estimate, 2014-2018 ACS
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.E_MUNIT))

# Mobile homes estimate MOE, 2014-2018 ACS
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.E_MOBILE))

# At household level (occupied housing units), more people than rooms estimate, 2014-2018 ACS
E_CROWD = dict(zip(svi_county.FIPS, svi_county.E_CROWD))

# Households with no vehicle available estimate, 2014-2018 ACS
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.E_NOVEH))

# Persons in institutionalized group quarters estimate, 2014-2018 ACS
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.E_GROUPQ))

# Percentage of persons below poverty estimate
E_POV = dict(zip(svi_county.FIPS, svi_county.E_POV))

# Medicaid 
Medicaid is a means-tested health insurance program for low-income children, pregnant women, adults, seniors, and people with disabilities. Medicaid is jointly funded by federal and state governments and managed by states within federal standards and a wide range of state options. <a href="https://data.medicaid.gov/Enrollment/State-Medicaid-and-CHIP-Applications-Eligibility-D/n5ce-jxme"> Data Source for Medicaid Enrollment </a> 

In [154]:

import sodapy
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.medicaid.gov", None)


# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("83yt-67it", limit=4000)


# Read the medicaid demand data
#df_mm = pd.read_csv("Data/2020_06_Preliminary_applications__eligibility_determinations__and_enrollment_data.csv")

# Convert to pandas DataFrame
df_mm = pd.DataFrame.from_records(results)
df_mm.head(5)
df_mm.columns




Index(['applications_for_financial_assistance_submitted_to_the_state_based_marketplace',
       'applications_for_financial_assistance_submitted_to_the_state_based_marketplace_footnotes',
       'final_report', 'geocoded_column',
       'individuals_determined_eligible_for_chip_at_application',
       'individuals_determined_eligible_for_chip_at_application_footnotes',
       'individuals_determined_eligible_for_medicaid_at_application',
       'individuals_determined_eligible_for_medicaid_at_application_footnotes',
       'latitude', 'longitude', 'medicaid_and_chip_child_enrollment',
       'medicaid_and_chip_child_enrollment_footnotes',
       'new_applications_submitted_to_medicaid_and_chip_agencies',
       'new_applications_submitted_to_medicaid_and_chip_agencies_footnotes',
       'preliminary_updated', 'report_date', 'state_abbreviation',
       'state_expanded_medicaid', 'state_name',
       'total_applications_for_financial_assistance_submitted_at_state_level',
       'total_a

In [155]:
df_mm['State Name'] = df_mm['state_name'].str.upper() 

In [156]:
Medicaid_state = dict(zip(df_mm['State Name'], df_mm['total_medicaid_and_chip_enrollment']))
Medicaid_state['PUERTO RICO'] = 1622194
print (Medicaid_state)

{'ALABAMA': '957116', 'ALASKA': '231145', 'ARIZONA': '1839932', 'ARKANSAS': '830467', 'CALIFORNIA': '11847711', 'COLORADO': '1337805', 'CONNECTICUT': '874974', 'DELAWARE': '239009', 'DISTRICT OF COLUMBIA': '248591', 'FLORIDA': '3892552', 'GEORGIA': '1928703', 'HAWAII': '351337', 'IDAHO': '340742', 'ILLINOIS': '2987496', 'INDIANA': '1602976', 'IOWA': '699741', 'KANSAS': '401103', 'KENTUCKY': '1416013', 'LOUISIANA': '1585024', 'MAINE': '232455', 'MARYLAND': '1372695', 'MASSACHUSETTS': '1616404', 'MICHIGAN': '2439425', 'MINNESOTA': '1085778', 'MISSISSIPPI': '632427', 'MISSOURI': '923641', 'MONTANA': '247333', 'NEBRASKA': '254159', 'NEVADA': '685073', 'NEW HAMPSHIRE': '193436', 'NEW JERSEY': '1759653', 'NEW MEXICO': '772102', 'NEW YORK': '6263164', 'NORTH CAROLINA': '1851558', 'NORTH DAKOTA': '96757', 'OHIO': '2788134', 'OKLAHOMA': '797220', 'OREGON': '1053931', 'PENNSYLVANIA': '3069309', 'RHODE ISLAND': '305208', 'SOUTH CAROLINA': '1048276', 'SOUTH DAKOTA': '114059', 'TENNESSEE': '1489536

In [157]:
df_mmm = pd.read_csv("Data/ACSST5Y2018.S2704_data_with_overlays_2020-08-01T140649.csv", header=[1])
df_mmm.head(1)
#df_mmm.dtypes

Unnamed: 0,id,Geographic Area Name,Estimate!!Total!!Civilian noninstitutionalized population,Margin of Error!!Total MOE!!Civilian noninstitutionalized population,Estimate!!Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Percent Public Coverage!!Civilian noninstitutionalized population,Margin of Error!!Percent Public Coverage MOE!!Civilian noninstitutionalized population,Estimate!!Total!!Medicare coverage alone or in combination,Margin of Error!!Total MOE!!Medicare coverage alone or in combination,Estimate!!Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination,Estimate!!Total!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!Under 19,Estimate!!Total!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicare coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicare coverage alone or in combination!!65 years and over,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!Under 19,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!19 to 64 years,Estimate!!Total!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!Medicaid/means-tested public coverage alone or in combination!!65 years and over,Estimate!!Total!!VA health care coverage alone or in combination,Margin of Error!!Total MOE!!VA health care coverage alone or in combination,Estimate!!Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination,Estimate!!Total!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!Under 19,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!Under 19,Estimate!!Total!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!19 to 64 years,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!19 to 64 years,Estimate!!Total!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Total MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Percent Public Coverage!!VA health care coverage alone or in combination!!65 years and over,Margin of Error!!Percent Public Coverage MOE!!VA health care coverage alone or in combination!!65 years and over,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Below 138 percent of the poverty threshold,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!At or above 138 percent of the poverty threshold,"Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)","Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Worked full-time, year-round (19-64 years)",Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!Under 6,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!6 to 18 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!19 to 25 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!26 to 34 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!35 to 44 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!45 to 54 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!55 to 64 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!65 to 74 years,Estimate!!Total!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Total MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Percent Public Coverage!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Margin of Error!!Percent Public Coverage MOE!!PUBLIC HEALTH INSURANCE ALONE OR IN COMBINATION!!75 years and over,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicare coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!Medicaid/means tested coverage alone,Estimate!!Total!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Total MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Estimate!!Percent Public Coverage!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone,Margin of Error!!Percent Public Coverage MOE!!COVERAGE ALONE!!Public health insurance alone!!VA health care coverage alone
0,0500000US01001,"Autauga County, Alabama",54277,219,18191,965,33.5,1.8,(X),(X),10026,410,18.5,0.8,14134,125,36,59,0.3,0.4,32229,261,2155,369,6.7,1.1,7914,152,7835,155,99.0,0.7,(X),(X),9049,859,16.7,1.6,14134,125,5352,682,37.9,4.9,32229,261,2788,452,8.7,1.4,7914,152,909,183,11.5,2.4,(X),(X),1701,290,3.1,0.5,14134,125,8,13,0.1,0.1,32229,261,989,252,3.1,0.8,7914,152,704,156,8.9,2.0,12303,1299,7466,880,60.7,4.9,41915,1301,10666,650,25.4,1.5,17605,763,1001,233,5.7,1.4,3974,267,1595,316,40.1,7.6,10160,310,3801,489,37.4,5.1,4406,284,560,235,12.7,5.3,6224,268,550,209,8.8,3.3,7042,206,1095,224,15.5,3.2,7771,134,1361,253,17.5,3.3,6786,87,1363,239,20.1,3.5,4697,68,4649,73,99.0,0.7,3217,138,3217,138,100.0,1.0,(X),(X),9513,834,17.5,1.5,(X),(X),2884,414,5.3,0.8,(X),(X),6503,810,12.0,1.5,(X),(X),126,101,0.2,0.2


In [158]:
df_mmm['FIPS'] = df_mmm.id.astype(str).str[9:]
df_mmm.head(5)
df_mmm.FIPS.astype(int)
df_mmm['FIPS'] = pd.to_numeric(df_mmm['FIPS'])

In [159]:
ACI_total  = dict(zip(df_mmm['FIPS'], df_mmm["Estimate!!Public Coverage!!Medicaid/means-tested public coverage alone or in combination"]))

In [160]:

#df_m = pd.read_csv("Data/Medicaid_Demand.csv")

# Unemployment 
The unemployment rate is calculated by the U.S. Bureau of Labor Statistics as the percentage of the civilian labor force who are without jobs and have actively sought work within the past four weeks. <a href="https://www.bls.gov/lau/laufaq.htm#Q01"> Data Source for Unemployment  </a> 

In [161]:
from io import StringIO
import datetime 
from datetime import date
from dateutil.relativedelta import relativedelta

back = date.today() + relativedelta(months= -3)
three_months_ago = back.strftime('%b-%y') 


url = 'https://www.bls.gov/web/metro/laucntycur14.txt'
s = requests.get(url).text



df_unemp = pd.read_csv(StringIO(s), sep='|',  skiprows=7, skipfooter=6, engine='python', names = ['LAUS Area Code', 'FIPS State', 'FIPS County', 'Area Title', 'Period', 'Civilian Labor Force','Employed','Unemployed_Level','Unemployed_Rate'])

df_unemp['Period'] = df_unemp['Period'].astype(str)

df_unemp['FIPS'] = df_unemp['LAUS Area Code'].str[3:8]

df_unemp = df_unemp[df_unemp['Period'].str.contains(str(three_months_ago))]

df_unemp.head(5)
#df_unemp.dtypes

Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
38627,CN0100100000000,1,1,"Autauga County, AL",Aug-20,25446,24272,1174,4.6,1001
38628,CN0100300000000,1,3,"Baldwin County, AL",Aug-20,98267,93297,4970,5.1,1003
38629,CN0100500000000,1,5,"Barbour County, AL",Aug-20,9476,8806,670,7.1,1005
38630,CN0100700000000,1,7,"Bibb County, AL",Aug-20,8676,8141,535,6.2,1007
38631,CN0100900000000,1,9,"Blount County, AL",Aug-20,24792,23889,903,3.6,1009


In [162]:
#df_unemp[[ 'FIPS', 'Area Title', 'Period','Unemployed_Level','Unemployed_Rate']].to_csv('Data/County_employment.csv', index=False)


In [163]:


df_unemp.replace({'-', 0})

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.replace(',', '')

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].astype(str)

df_unemp['Unemployed_Level'] = df_unemp['Unemployed_Level'].str.strip()

df_unemp['Unemployed_Rate'] = df_unemp['Unemployed_Rate'].str.strip()

df_unemp['FIPS'] = pd.to_numeric(df_unemp['FIPS'])

df_unemp['Unemployed_Level'] = pd.to_numeric(df_unemp['Unemployed_Level'])

df_unemp['Unemployed_Rate'] = pd.to_numeric(df_unemp['Unemployed_Rate'])




# Fill NA with 0
#df_unemp = df_unemp.fillna(0)
df_unemp.tail(5)
#df_unemp.dtypes


Unnamed: 0,LAUS Area Code,FIPS State,FIPS County,Area Title,Period,Civilian Labor Force,Employed,Unemployed_Level,Unemployed_Rate,FIPS
41841,CN7214500000000,72,145,"Vega Baja Municipio, PR",Aug-20,12806,11317,1489,11.6,72145
41842,CN7214700000000,72,147,"Vieques Municipio, PR",Aug-20,2487,2230,257,10.3,72147
41843,CN7214900000000,72,149,"Villalba Municipio, PR",Aug-20,6722,6096,626,9.3,72149
41844,CN7215100000000,72,151,"Yabucoa Municipio, PR",Aug-20,8064,7286,778,9.6,72151
41845,CN7215300000000,72,153,"Yauco Municipio, PR",Aug-20,9159,8203,956,10.4,72153


In [164]:
 
## Read the Unemployment data using cvs data 
#df_unemp = pd.read_csv("Data/Unemployment.csv")

# Fill NA with 0
#df_unemp = df_unemp.fillna(0)

#df_unemp.head(5)

# Years of Potential Life Lost (YPLL)

Years of Potential Life Lost (YPLL) measures the rate of premature deaths by region. YPLL is calculated as the sum of the estimated number of years that individuals would have lived if they had not died before the age of 75 per 100,000 people. <a href="https://www.countyhealthrankings.org/sites/default/files/media/document/2020%20County%20Health%20Rankings%20Data%20-%20v2.xlsx"> Data Source for YPLL.  </a> More information about YPLL can be dounf in this <a href="https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/county-health-rankings-model/health-outcomes/length-of-life/premature-death-ypll"> link. </a> 


In [165]:
# Read the YPLL data
df_y = pd.read_csv("Data/YPLL.csv")

# Fill NA with the mean of the data
df_y = df_y.fillna(df_y.mean())


# Population

In [166]:
# Read the Population data
df_pop = pd.read_csv("Data/County_pop_2019.csv")

# Fill NA with 0
df_pop = df_pop.fillna(0)


In [167]:
# Create a dictionary for the county and population
population_county = df_pop.set_index('FIPS')['pop'].to_dict()

# Create a dictionary for the county and YPLL
YPLL = dict(zip(df_y.FIPS, df_y.YPLL))

# Create a dictionary for the county and Unemployment
Unemployment = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Level))


# Create a dictionary for the county and Community Health Workers (CHW) demand
# Note that we assume a CHW can serve 55 Medicaid patient so the demand for CHW will be

#Medicaid_demand = dict(zip(df_m.FIPS, df_m.Med_Demand))


In [168]:
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

# COVID-19 Cases & COVID-19 Cases per Capita

What are COVID-19 Cases and COVID-19 Cases per Capita?

COVID-19 cases is an absolute metric of the total number of COVID-19 cases in a county over the last fourteen days.  COVID-19 cases per 100,000 is a relative metric calculated by dividing the number of COVID-19 cases by the estimated county population and multiplying by 100,000.  Cases include both confirmed cases, based on viral testing, and probable cases, based on specific criteria for symptoms and epidemiological exposure. We use NY Times Covid data. 


In [169]:
#### Data with the most recent date in NY Times dataset:

today = time.strftime('%Y-%m-%d')
covid_data_update_date = today#'2020-07-21'#today #or enter a specific date such as '2020-07-06'


In [170]:
## 14 day period defined
data_date_dt = pd.to_datetime(covid_data_update_date,infer_datetime_format = True)

N = 14

date_N_days_ago = data_date_dt - timedelta(days = N)

date_N1_days_ago = data_date_dt - timedelta(days = N+1)

In [171]:

# URL for mainland US data
url = "http://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
s = requests.get(url).content
covid = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [172]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths
748452,2020-11-19,Ozaukee,Wisconsin,55089.0,4604,36
748453,2020-11-19,Pepin,Wisconsin,55091.0,409,2
748454,2020-11-19,Pierce,Wisconsin,55093.0,2019,15
748455,2020-11-19,Polk,Wisconsin,55095.0,1738,9
748456,2020-11-19,Portage,Wisconsin,55097.0,4387,35
748457,2020-11-19,Price,Wisconsin,55099.0,651,3
748458,2020-11-19,Racine,Wisconsin,55101.0,13501,149
748459,2020-11-19,Richland,Wisconsin,55103.0,774,11
748460,2020-11-19,Rock,Wisconsin,55105.0,8468,64
748461,2020-11-19,Rusk,Wisconsin,55107.0,726,5


Note: Since NY data is seperately available, we first read the NY data for all 5 different borough then combine with the rest of US data.

In [173]:
# URL for NY
url = "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/trends/data-by-day.csv"
#"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/data-by-day.csv"

ny = requests.get(url).content
covid_ny = pd.read_csv(io.StringIO(ny.decode('utf-8')))


covid_ny.tail(5)

Unnamed: 0,date_of_interest,CASE_COUNT,HOSPITALIZED_COUNT,DEATH_COUNT,DEATH_COUNT_PROBABLE,CASE_COUNT_7DAY_AVG,HOSP_COUNT_7DAY_AVG,DEATH_COUNT_7DAY_AVG,BX_CASE_COUNT,BX_HOSPITALIZED_COUNT,BX_DEATH_COUNT,BX_CASE_COUNT_7DAY_AVG,BX_HOSPITALIZED_COUNT_7DAY_AVG,BX_DEATH_COUNT_7DAY_AVG,BK_CASE_COUNT,BK_HOSPITALIZED_COUNT,BK_DEATH_COUNT,BK_CASE_COUNT_7DAY_AVG,BK_HOSPITALIZED_COUNT_7DAY_AVG,BK_DEATH_COUNT_7DAY_AVG,MN_CASE_COUNT,MN_HOSPITALIZED_COUNT,MN_DEATH_COUNT,MN_CASE_COUNT_7DAY_AVG,MN_HOSPITALIZED_COUNT_7DAY_AVG,MN_DEATH_COUNT_7DAY_AVG,QN_CASE_COUNT,QN_HOSPITALIZED_COUNT,QN_DEATH_COUNT,QN_CASE_COUNT_7DAY_AVG,QN_HOSPITALIZED_COUNT_7DAY_AVG,QN_DEATH_COUNT_7DAY_AVG,SI_CASE_COUNT,SI_HOSPITALIZED_COUNT,SI_DEATH_COUNT,SI_CASE_COUNT_7DAY_AVG,SI_HOSPITALIZED_COUNT_7DAY_AVG,SI_DEATH_COUNT_7DAY_AVG,INCOMPLETE
258,11/13/2020,1368,70,6,0,1242,64,9,227,16,2,197,15,2,334,22,1,332,15,2,240,7,1,228,8,1,411,18,1,347,18,2,156,7,1,139,8,1,6000
259,11/14/2020,897,79,10,3,1257,68,8,160,17,1,203,16,2,206,22,5,331,17,2,140,14,1,228,9,1,298,18,2,355,17,2,93,8,1,140,8,1,6000
260,11/15/2020,760,77,10,1,1258,71,8,111,12,2,205,16,2,191,15,3,327,18,2,112,17,1,226,10,1,214,22,1,355,18,2,132,11,3,146,8,1,6000
261,11/16/2020,1178,93,8,0,1212,75,9,203,23,1,200,17,2,320,22,1,315,18,2,207,13,3,217,11,2,346,30,1,348,20,1,102,5,2,132,8,1,6000
262,11/17/2020,720,49,4,1,1103,73,8,129,13,1,182,17,2,222,15,0,285,17,2,110,4,0,191,11,2,214,12,1,325,20,1,45,5,2,119,8,2,6000


In [174]:
Kings = covid_ny[['date_of_interest', 'BK_CASE_COUNT', 'BK_DEATH_COUNT']]
Kings.rename(columns = {'BK_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BK_DEATH_COUNT': 'deaths'} , inplace=True)
Kings['county'] = 'Kings'
Kings['state'] = 'New York'
Kings['fips'] = 36047.0
#Kings.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [175]:
Bronx = covid_ny[['date_of_interest', 'BX_CASE_COUNT', 'BX_DEATH_COUNT']]
Bronx.rename(columns = {'BX_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'BX_DEATH_COUNT': 'deaths'} , inplace=True)
Bronx['state'] = 'New York'
Bronx['county'] = 'Bronx'
Bronx['fips'] = 36005.0
#Bronx.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [176]:
Manhattan = covid_ny[['date_of_interest', 'MN_CASE_COUNT', 'MN_DEATH_COUNT']]
Manhattan.rename(columns = {'MN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'MN_DEATH_COUNT': 'deaths'} , inplace=True)
Manhattan['state'] = 'New York'
Manhattan['county'] = 'Manhattan'
Manhattan['fips'] = 36061.0
#Manhattan.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [177]:
Queens = covid_ny[['date_of_interest', 'QN_CASE_COUNT', 'QN_DEATH_COUNT']]
Queens.rename(columns = {'QN_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'QN_DEATH_COUNT': 'deaths'} , inplace=True)
Queens['state'] = 'New York'
Queens['county'] = 'Queens'
Queens['fips'] = 36081.0
#Queens.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [178]:
Richmond = covid_ny[['date_of_interest', 'SI_CASE_COUNT', 'SI_DEATH_COUNT']]
Richmond.rename(columns = {'SI_CASE_COUNT': 'cases', 'date_of_interest': 'date', 'SI_DEATH_COUNT': 'deaths'} , inplace=True)
Richmond['state'] = 'New York'
Richmond['county'] = 'Richmond'
Richmond['fips'] = 36085.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [179]:
covid.head(5)


Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [180]:
Bronx.tail(5)

Unnamed: 0,date,cases,deaths,state,county,fips
258,11/13/2020,227,2,New York,Bronx,36005.0
259,11/14/2020,160,1,New York,Bronx,36005.0
260,11/15/2020,111,2,New York,Bronx,36005.0
261,11/16/2020,203,1,New York,Bronx,36005.0
262,11/17/2020,129,1,New York,Bronx,36005.0


In [181]:
#Kings['deaths'] = Kings.groupby(by=['fips'])['deaths_d'].sum()
#Bronx['deaths'] = Bronx.groupby(by=['fips'])['deaths_d'].sum()
#Manhattan['deaths'] = Manhattan.groupby(by=['fips'])['deaths_d'].sum()
#Queens['deaths'] = Queens.groupby(by=['fips'])['deaths_d'].sum()
#Richmond['deaths'] = Richmond.groupby(by=['fips'])['deaths_d'].sum()

In [182]:
Kings.head(5)

Unnamed: 0,date,cases,deaths,county,state,fips
0,02/29/2020,0,0,Kings,New York,36047.0
1,03/01/2020,0,0,Kings,New York,36047.0
2,03/02/2020,0,0,Kings,New York,36047.0
3,03/03/2020,0,0,Kings,New York,36047.0
4,03/04/2020,1,0,Kings,New York,36047.0


In [183]:
#Kings = Kings.drop(['deaths_d'], axis=1)
#Bronx = Bronx.drop(['deaths_d'], axis=1)
#Manhattan = Manhattan.drop(['deaths_d'], axis=1)
#Queens = Queens.drop(['deaths_d'], axis=1)
#Richmond = Richmond.drop(['deaths_d'], axis=1)

In [184]:
Kings.tail(5)

Unnamed: 0,date,cases,deaths,county,state,fips
258,11/13/2020,334,1,Kings,New York,36047.0
259,11/14/2020,206,5,Kings,New York,36047.0
260,11/15/2020,191,3,Kings,New York,36047.0
261,11/16/2020,320,1,Kings,New York,36047.0
262,11/17/2020,222,0,Kings,New York,36047.0


In [185]:
covid['dt'] = pd.to_datetime(covid['date'], infer_datetime_format=True)
Kings['dt'] = pd.to_datetime(Kings['date'], infer_datetime_format=True)
Bronx['dt'] = pd.to_datetime(Bronx['date'], infer_datetime_format=True)
Manhattan['dt'] = pd.to_datetime(Manhattan['date'], infer_datetime_format=True)
Queens['dt'] = pd.to_datetime(Queens['date'], infer_datetime_format=True)
Richmond['dt'] = pd.to_datetime(Richmond['date'], infer_datetime_format=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

In [186]:

Kings_c = Kings[(Kings['dt']>date_N1_days_ago) & (Kings['dt']<= data_date_dt)].copy()
Queens_c = Queens[(Queens['dt']>date_N1_days_ago) & (Queens['dt']<= data_date_dt)].copy()
Bronx_c = Bronx[(Bronx['dt']>date_N1_days_ago) & (Bronx['dt']<= data_date_dt)].copy()
Manhattan_c = Manhattan[(Manhattan['dt']>date_N1_days_ago) & (Manhattan['dt']<= data_date_dt)].copy()
Richmond_c = Richmond[(Richmond['dt']>date_N1_days_ago) & (Richmond['dt']<= data_date_dt)].copy()

In [187]:
#Kings['cases'] = Kings.loc[(Kings['dt']<= data_date_dt)].groupby(['dt'])['d_cases'].sum()
#Kings['cases'] = Kings.apply(lambda x: x[(Kings['dt']<= data_date_dt)]['d_cases'].sum())

In [188]:
Kings.head(50)

Unnamed: 0,date,cases,deaths,county,state,fips,dt
0,02/29/2020,0,0,Kings,New York,36047.0,2020-02-29
1,03/01/2020,0,0,Kings,New York,36047.0,2020-03-01
2,03/02/2020,0,0,Kings,New York,36047.0,2020-03-02
3,03/03/2020,0,0,Kings,New York,36047.0,2020-03-03
4,03/04/2020,1,0,Kings,New York,36047.0,2020-03-04
5,03/05/2020,3,0,Kings,New York,36047.0,2020-03-05
6,03/06/2020,1,0,Kings,New York,36047.0,2020-03-06
7,03/07/2020,2,0,Kings,New York,36047.0,2020-03-07
8,03/08/2020,5,0,Kings,New York,36047.0,2020-03-08
9,03/09/2020,16,0,Kings,New York,36047.0,2020-03-09


In [189]:
K_c = Kings_c.groupby(['fips']).sum().reset_index()
Q_c = Queens_c.groupby(['fips']).sum().reset_index()
B_c = Bronx_c.groupby(['fips']).sum().reset_index()
M_c = Manhattan_c.groupby(['fips']).sum().reset_index()
R_c = Richmond_c.groupby(['fips']).sum().reset_index()
K_c.head(19)

Unnamed: 0,fips,cases,deaths
0,36047.0,3489,27


In [190]:
covid_death = covid[(covid['dt'] >= data_date_dt- timedelta(days = 1))]
Kings_death = Kings.groupby(by=['fips'])['deaths'].sum().reset_index()
Bronx_death = Bronx.groupby(by=['fips'])['deaths'].sum().reset_index()
Manhattan_death = Manhattan.groupby(by=['fips'])['deaths'].sum().reset_index()
Queens_death = Queens.groupby(by=['fips'])['deaths'].sum().reset_index()
Richmond_death = Richmond.groupby(by=['fips'])['deaths'].sum().reset_index()

In [191]:
# We merge the NY data with the rest of the US data
covid = pd.concat([covid, Kings, Bronx, Manhattan, Queens, Richmond], sort = False)


In [192]:
Queens.tail(14)

Unnamed: 0,date,cases,deaths,state,county,fips,dt
249,11/04/2020,266,1,New York,Queens,36081.0,2020-11-04
250,11/05/2020,300,2,New York,Queens,36081.0,2020-11-05
251,11/06/2020,293,3,New York,Queens,36081.0,2020-11-06
252,11/07/2020,237,4,New York,Queens,36081.0,2020-11-07
253,11/08/2020,216,2,New York,Queens,36081.0,2020-11-08
254,11/09/2020,394,3,New York,Queens,36081.0,2020-11-09
255,11/10/2020,376,1,New York,Queens,36081.0,2020-11-10
256,11/11/2020,409,2,New York,Queens,36081.0,2020-11-11
257,11/12/2020,384,1,New York,Queens,36081.0,2020-11-12
258,11/13/2020,411,1,New York,Queens,36081.0,2020-11-13


In [193]:

covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
213,09/29/2020,Richmond,New York,36085.0,34,0,2020-09-29
214,09/30/2020,Richmond,New York,36085.0,40,0,2020-09-30
215,10/01/2020,Richmond,New York,36085.0,25,0,2020-10-01
216,10/02/2020,Richmond,New York,36085.0,35,0,2020-10-02
217,10/03/2020,Richmond,New York,36085.0,17,1,2020-10-03
218,10/04/2020,Richmond,New York,36085.0,26,1,2020-10-04
219,10/05/2020,Richmond,New York,36085.0,38,0,2020-10-05
220,10/06/2020,Richmond,New York,36085.0,29,0,2020-10-06
221,10/07/2020,Richmond,New York,36085.0,37,0,2020-10-07
222,10/08/2020,Richmond,New York,36085.0,33,0,2020-10-08


In [194]:
print (data_date_dt)

2020-11-20 00:00:00


In [195]:

covid_death = covid_death[['fips', 'deaths']]
covid_death.head(2)

Unnamed: 0,fips,deaths
745255,1001.0,39
745256,1003.0,84


In [196]:
Kings_death.head(2)

Unnamed: 0,fips,deaths
0,36047.0,5810


In [197]:
# We merge the NY data with the rest of the US data
covid_death = pd.concat([covid_death, Kings_death, Bronx_death, Manhattan_death, Queens_death, Richmond_death], sort = False)

covid_death = covid_death.rename(columns={"deaths": "total_deaths"})


# Create a dictionary for the cumulative COVID deaths in each county
County_covid_death = dict(zip(covid_death.fips, covid_death.total_deaths))

covid_death.tail(250)

Unnamed: 0,fips,total_deaths
748257,51115.0,1
748258,51117.0,35
748259,51119.0,11
748260,51121.0,12
748261,51125.0,2
748262,51127.0,4
748263,51700.0,53
748264,51710.0,88
748265,51131.0,31
748266,51133.0,8


In [198]:
covid_death.shape

(3252, 2)

In [199]:
print (County_covid_death)

{1001.0: 39, 1003.0: 84, 1005.0: 10, 1007.0: 18, 1009.0: 35, 1011.0: 19, 1013.0: 41, 1015.0: 111, 1017.0: 48, 1019.0: 24, 1021.0: 36, 1023.0: 12, 1025.0: 19, 1027.0: 23, 1029.0: 14, 1031.0: 15, 1033.0: 40, 1035.0: 14, 1037.0: 4, 1039.0: 32, 1041.0: 30, 1043.0: 32, 1045.0: 55, 1047.0: 31, 1049.0: 34, 1051.0: 61, 1053.0: 31, 1055.0: 63, 1057.0: 16, 1059.0: 33, 1061.0: 8, 1063.0: 17, 1065.0: 31, 1067.0: 6, 1069.0: 38, 1071.0: 24, 1073.0: 490, 1075.0: 7, 1077.0: 53, 1079.0: 35, 1081.0: 64, 1083.0: 44, 1085.0: 29, 1087.0: 20, 1089.0: 129, 1091.0: 24, 1093.0: 33, 1095.0: 54, 1097.0: 356, 1099.0: 11, 1101.0: 231, 1103.0: 44, 1105.0: 6, 1107.0: 18, 1109.0: 14, 1111.0: 21, 1113.0: 3, 1117.0: 75, 1115.0: 55, 1119.0: 22, 1121.0: 41, 1123.0: 89, 1125.0: 149, 1127.0: 96, 1129.0: 17, 1131.0: 18, 1133.0: 23, 2013.0: 0, 2016.0: 0, 2020.0: 61, 2050.0: 1, 2060.0: 0, 2068.0: 0, 2070.0: 0, 2090.0: 17, 2100.0: 0, 2110.0: 2, 2122.0: 4, 2130.0: 0, 2150.0: 0, 2158.0: 0, 2164.0: 0, 2170.0: 5, 2180.0: 0, 2185.0

In [200]:
## subset last last 15 days
covid_last15 = covid[(covid['dt']>date_N1_days_ago) & (covid['dt']<= data_date_dt)].copy()
covid_last15['dt_time_delta'] = covid_last15['dt']-data_date_dt

In [201]:
## calculate new daily cases

## sort values by county and date
covid_last15.sort_values(by=['fips','dt'],inplace=True)
## remove data with 'unknown' counties
covid_last15 = covid_last15[covid_last15['fips'].notnull()].copy()

## calculate daily difference in number of cases
covid_last15['new_cases'] = covid_last15.groupby('fips')['cases'].transform(lambda x: x.diff())
## set negative new cases to zero, this can occuer due to the disperacy in the data

covid_last15.loc[covid_last15.new_cases < 1e-6, 'new_cases'] = 0
covid_last15.sort_index(inplace = True)

In [202]:
covid_last15[covid_last15.county == 'New Haven'].head(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt,dt_time_delta,new_cases
703386,2020-11-06,New Haven,Connecticut,9009.0,19154,1137,2020-11-06,-14 days,
706629,2020-11-07,New Haven,Connecticut,9009.0,19154,1137,2020-11-07,-13 days,0.0
709873,2020-11-08,New Haven,Connecticut,9009.0,19154,1137,2020-11-08,-12 days,0.0
713117,2020-11-09,New Haven,Connecticut,9009.0,20053,1144,2020-11-09,-11 days,899.0
716361,2020-11-10,New Haven,Connecticut,9009.0,20434,1148,2020-11-10,-10 days,381.0
719605,2020-11-11,New Haven,Connecticut,9009.0,20892,1151,2020-11-11,-9 days,458.0
722849,2020-11-12,New Haven,Connecticut,9009.0,21237,1155,2020-11-12,-8 days,345.0
726093,2020-11-13,New Haven,Connecticut,9009.0,22015,1157,2020-11-13,-7 days,778.0
729337,2020-11-14,New Haven,Connecticut,9009.0,22015,1157,2020-11-14,-6 days,0.0
732582,2020-11-15,New Haven,Connecticut,9009.0,22015,1157,2020-11-15,-5 days,0.0


In [203]:
## select just last 14 days now that we have daily new cases with 15th day as baseline
covid_last14 = covid_last15[(covid_last15['dt'] > date_N_days_ago) & (covid_last15['dt'] <= data_date_dt)].copy()


In [204]:
## group by FIPS to get case load and follow up demand values for each county
covid_last14_stats = covid_last14.groupby(['fips'])['new_cases'].sum().reset_index(name ='total_cases')

covid_last14_stats[covid_last14_stats.fips == 9009].head(5)

Unnamed: 0,fips,total_cases
310,9009.0,5579.0


In [205]:
covid.tail(50)

Unnamed: 0,date,county,state,fips,cases,deaths,dt
213,09/29/2020,Richmond,New York,36085.0,34,0,2020-09-29
214,09/30/2020,Richmond,New York,36085.0,40,0,2020-09-30
215,10/01/2020,Richmond,New York,36085.0,25,0,2020-10-01
216,10/02/2020,Richmond,New York,36085.0,35,0,2020-10-02
217,10/03/2020,Richmond,New York,36085.0,17,1,2020-10-03
218,10/04/2020,Richmond,New York,36085.0,26,1,2020-10-04
219,10/05/2020,Richmond,New York,36085.0,38,0,2020-10-05
220,10/06/2020,Richmond,New York,36085.0,29,0,2020-10-06
221,10/07/2020,Richmond,New York,36085.0,37,0,2020-10-07
222,10/08/2020,Richmond,New York,36085.0,33,0,2020-10-08


In [206]:
## group by FIPS to get case load and follow up demand values for each county
#covid_death = covid.groupby(['fips'])['deaths'].sum().reset_index(name ='total_deaths')
#covid_death.head(250)

In [207]:
## group by FIPS to get case load each month and follow up demand values for each county
covid['month'] = pd.to_datetime(covid['date'])
covid['year'] = pd.to_datetime(covid['date'])


In [208]:
#covid_last14_stats_montly = covid.groupby(['fips', covid.month.dt.month, covid.year.dt.year])['deaths'].sum().reset_index() 

#covid_last14_stats_montly['cumulative_death'] = covid_last14_stats_montly.groupby(['fips'])['deaths'].cumsum(axis = 0) 
#covid_last14_stats_montly = covid_last14_stats_montly.groupby(['fips', covid_last14_stats_montly.month,  covid_last14_stats_montly.year])['deaths'].cumsum()

#covid_last14_stats_montly.head(10)

In [209]:
# adding population information from CDC svi dataset
covid_last14_stats = covid_last14_stats.reset_index()
covid_last14_stats['fips'] = covid_last14_stats['fips'].astype(int)
svi_county['FIPS'] = svi_county['FIPS'].astype(int)
covid_last14_stats = pd.merge(left = covid_last14_stats, right = svi_county[['E_TOTPOP','FIPS', 'STATE']], how = 'right', right_on = 'FIPS', left_on = 'fips' )
covid_last14_stats.fillna(0 , inplace=True)

In [210]:

covid_last14_stats.tail(5)

Unnamed: 0,index,fips,total_cases,E_TOTPOP,FIPS,STATE
3215,3217.0,72151.0,39.0,34149,72151,PUERTO RICO
3216,3218.0,72153.0,64.0,36439,72153,PUERTO RICO
3217,0.0,0.0,0.0,2132,2105,ALASKA
3218,0.0,0.0,0.0,689,2282,ALASKA
3219,0.0,0.0,0.0,75,15005,HAWAII


In [211]:
now = pd.to_datetime("now")

print (now)
m_now = now.month
print (m_now)

y_now = now.year

2020-11-21 02:16:54.155191
11


In [212]:
#one_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-1) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#one_month_lag_death = dict(zip(one_month_lag_cumulative_death.fips, one_month_lag_cumulative_death.cumulative_death))

#one_month_lag_cumulative_death.head(5)


In [213]:
#two_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now-2) & (covid_last14_stats_montly['year'] == y_now)]

# Create a dictionary for the last month COVID deaths in each county
#two_month_lag_death = dict(zip(two_month_lag_cumulative_death.fips, two_month_lag_cumulative_death.cumulative_death))


#two_month_lag_cumulative_death.head(5)

In [214]:
#three_month_lag_cumulative_death =  covid_last14_stats_montly[(covid_last14_stats_montly['month'] == m_now - 3) & (covid_last14_stats_montly['year'] == y_now)]


# Create a dictionary for the 3 last month COVID deaths in each county
#three_month_lag_death = dict(zip(three_month_lag_cumulative_death.fips, three_month_lag_cumulative_death.cumulative_death))

#three_month_lag_cumulative_death.head(5)

#print (three_month_lag_death)

In [215]:

# Create a dictionary for the states of the given the county FIPS
county_of_states = dict(zip(svi_county.FIPS, svi_county.STATE))

# Create a dictionary for the name of the given the county FIPS
county_name = dict(zip(svi_county.FIPS, svi_county.COUNTY))

# Create the list for county FIPS, we consider counties as analogy to the center for community health workers
location = svi_county.FIPS.tolist() #[k for k in SVI_county] #[9001, 9003, 9005, 9007, 9009, 9011, 9013, 9015]#[k for k in SVI_county]




In [216]:
for j in County_covid_death:
       
    print (j, County_covid_death[j])

1001.0 39
1003.0 84
1005.0 10
1007.0 18
1009.0 35
1011.0 19
1013.0 41
1015.0 111
1017.0 48
1019.0 24
1021.0 36
1023.0 12
1025.0 19
1027.0 23
1029.0 14
1031.0 15
1033.0 40
1035.0 14
1037.0 4
1039.0 32
1041.0 30
1043.0 32
1045.0 55
1047.0 31
1049.0 34
1051.0 61
1053.0 31
1055.0 63
1057.0 16
1059.0 33
1061.0 8
1063.0 17
1065.0 31
1067.0 6
1069.0 38
1071.0 24
1073.0 490
1075.0 7
1077.0 53
1079.0 35
1081.0 64
1083.0 44
1085.0 29
1087.0 20
1089.0 129
1091.0 24
1093.0 33
1095.0 54
1097.0 356
1099.0 11
1101.0 231
1103.0 44
1105.0 6
1107.0 18
1109.0 14
1111.0 21
1113.0 3
1117.0 75
1115.0 55
1119.0 22
1121.0 41
1123.0 89
1125.0 149
1127.0 96
1129.0 17
1131.0 18
1133.0 23
2013.0 0
2016.0 0
2020.0 61
2050.0 1
2060.0 0
2068.0 0
2070.0 0
2090.0 17
2100.0 0
2110.0 2
2122.0 4
2130.0 0
2150.0 0
2158.0 0
2164.0 0
2170.0 5
2180.0 0
2185.0 0
2188.0 0
2195.0 0
2198.0 1
2220.0 0
2230.0 0
2240.0 1
nan 0
2261.0 1
2275.0 0
2290.0 2
4001.0 189
4003.0 77
4005.0 161
4007.0 83
4009.0 33
4011.0 2
4012.0 18
4013.0 3

26105.0 8
26107.0 7
26109.0 12
26111.0 22
26113.0 2
26115.0 56
26117.0 21
26119.0 1
26121.0 142
26123.0 13
26125.0 1299
26127.0 11
26129.0 10
26131.0 9
26133.0 2
26135.0 2
26137.0 15
26139.0 120
26141.0 2
26143.0 7
26145.0 195
26151.0 12
26153.0 2
26155.0 36
26147.0 76
26149.0 30
26157.0 44
nan 5
26159.0 30
26161.0 137
26163.0 3147
26165.0 12
27001.0 18
27003.0 203
27005.0 6
27007.0 13
27009.0 36
27011.0 1
27013.0 12
27015.0 9
27017.0 9
27019.0 10
27021.0 8
27023.0 7
27025.0 7
27027.0 51
27029.0 4
27031.0 0
27033.0 0
27035.0 27
27037.0 170
27039.0 0
27041.0 22
27043.0 0
27045.0 0
27047.0 5
27049.0 25
27051.0 6
27053.0 1066
27055.0 2
27057.0 17
27059.0 11
27061.0 21
27063.0 1
27065.0 11
27067.0 12
27069.0 3
27071.0 5
27073.0 3
27075.0 1
27077.0 1
27079.0 8
27081.0 1
27083.0 10
27087.0 4
27089.0 7
27091.0 20
27085.0 8
27093.0 7
27095.0 30
27097.0 19
27099.0 22
27101.0 3
27103.0 22
27105.0 27
27107.0 6
27109.0 30
27111.0 14
27113.0 4
27115.0 5
27117.0 17
27119.0 20
27121.0 0
27123.0 450
2

48325.0 49
48327.0 3
48329.0 119
48331.0 9
48333.0 4
48335.0 5
48337.0 14
48339.0 201
48341.0 27
48343.0 8
48345.0 0
48347.0 77
48349.0 42
48351.0 16
48353.0 8
48355.0 467
48357.0 6
48359.0 2
48361.0 51
48363.0 27
48365.0 34
48367.0 61
48369.0 22
48371.0 15
48373.0 42
48375.0 148
48377.0 7
48379.0 7
48381.0 93
48383.0 6
48385.0 8
48387.0 18
48389.0 11
48391.0 16
48393.0 0
48395.0 6
48397.0 35
48399.0 11
48401.0 29
48403.0 10
48405.0 16
48407.0 14
48409.0 84
48411.0 5
48413.0 2
48415.0 19
48417.0 0
48419.0 35
48421.0 1
48423.0 189
48425.0 5
48427.0 187
48429.0 7
48431.0 1
48433.0 1
48435.0 3
48437.0 7
48439.0 968
48441.0 98
48443.0 0
48445.0 18
48447.0 2
48449.0 40
48451.0 97
48453.0 473
48455.0 8
48457.0 4
48459.0 17
48461.0 5
48463.0 38
48465.0 137
48467.0 42
48469.0 99
48471.0 68
48473.0 18
48475.0 4
48477.0 53
48479.0 385
48481.0 64
48483.0 4
48485.0 97
48487.0 10
48489.0 59
48491.0 161
48493.0 28
48495.0 5
48497.0 28
48499.0 47
48501.0 10
48503.0 18
48505.0 9
48507.0 21
49001.0 2
4

In [217]:
K_c.head()

Unnamed: 0,fips,cases,deaths
0,36047.0,3489,27


In [218]:
K = dict(zip(K_c.fips, K_c.cases))
Q = dict(zip(Q_c.fips, Q_c.cases))
B = dict(zip(B_c.fips, B_c.cases))
M = dict(zip(M_c.fips, M_c.cases))
R  = dict(zip(R_c.fips, R_c.cases))

In [219]:
print (K)

{36047.0: 3489}


In [220]:
# Create a dictionary for the county and covid cases
covid_cases_county_ny_times = dict(zip(covid_last14_stats.fips, covid_last14_stats.total_cases))
COVID_14days = {}

for j in location:
    if j in covid_cases_county_ny_times:
        COVID_14days[j] = covid_cases_county_ny_times[j] 
        print(j, covid_cases_county_ny_times[j])
        

        
    else:
        COVID_14days[j] = 0
        
        
    if j not in County_covid_death:
        print ('j',j)
        County_covid_death[j] = 0

#print (three_month_lag_death)
#print (two_month_lag_covid_death)
#print (one_month_lag_covid_death)
#for j in location:
#    if j not in three_month_lag_death:        
#        three_month_lag_death[j] = 0
        
#    if j not in two_month_lag_death:        
#        two_month_lag_death[j] = 0
        
#    if j not in one_month_lag_death:        
#        one_month_lag_death[j] = 0
        
    #if j not in County_covid_death:
    #    County_covid_death[j] = 0

35039 351.0
1001 312.0
1009 430.0
1013 69.0
1015 737.0
1017 172.0
1031 299.0
1033 422.0
1039 147.0
1043 851.0
1045 227.0
1051 394.0
1055 858.0
1067 53.0
1069 471.0
1071 361.0
1077 704.0
1079 169.0
1083 563.0
1089 1684.0
1095 806.0
1097 1299.0
1103 985.0
1111 85.0
1113 86.0
1115 439.0
1117 1354.0
1121 353.0
2261 42.0
4021 2047.0
5009 324.0
5011 34.0
5033 451.0
5037 89.0
5045 609.0
5047 88.0
5051 561.0
5053 98.0
5063 323.0
5083 179.0
5085 384.0
5087 83.0
5115 430.0
5117 63.0
5121 162.0
5125 807.0
5131 997.0
5145 718.0
6007 341.0
6017 333.0
6023 106.0
6027 17.0
6061 950.0
6079 918.0
6089 1118.0
6093 179.0
8019 59.0
8039 165.0
8047 32.0
8051 96.0
8057 5.0
8065 61.0
8067 279.0
8071 47.0
8075 437.0
8077 2239.0
8083 216.0
8085 190.0
8093 68.0
8099 170.0
8101 2772.0
8103 47.0
8119 181.0
8121 48.0
9005 941.0
9007 794.0
9011 1036.0
9015 508.0
10001 508.0
12019 748.0
12033 1445.0
12089 261.0
12101 1598.0
12109 821.0
12113 664.0
13015 527.0
13039 165.0
13045 603.0
13047 263.0
13057 1236.0
13073 76

38073 115.0
38075 40.0
38081 80.0
38091 34.0
38095 39.0
38097 158.0
38099 435.0
39003 1245.0
39005 305.0
39007 797.0
39011 587.0
39013 431.0
39015 265.0
39017 2933.0
39019 98.0
39021 276.0
39023 1240.0
39025 1665.0
39027 270.0
39029 636.0
39033 348.0
39037 526.0
39039 407.0
39043 441.0
39045 1277.0
39051 345.0
39053 199.0
39055 606.0
39057 1363.0
39059 217.0
39063 587.0
39065 266.0
39067 51.0
39069 204.0
39071 213.0
39073 210.0
39077 404.0
39081 309.0
39083 417.0
39085 2689.0
39089 1285.0
39091 394.0
39093 1885.0
39099 1669.0
39103 1241.0
39107 685.0
39109 1068.0
39111 58.0
39113 4533.0
39117 211.0
39119 837.0
39123 286.0
39125 174.0
39127 177.0
39133 828.0
39135 434.0
39137 470.0
39139 785.0
39141 422.0
39143 386.0
39147 608.0
39149 443.0
39151 2403.0
39153 3330.0
39155 1601.0
39157 959.0
39159 557.0
39161 342.0
39165 1794.0
39167 350.0
39169 735.0
39171 369.0
39175 222.0
40013 631.0
40015 300.0
40017 1356.0
40019 307.0
40021 395.0
40027 1934.0
40031 1036.0
40033 44.0
40035 135.0
4003

48295 45.0
48303 5476.0
48331 36.0
48349 311.0
48351 3.0
48353 132.0
48365 24.0
48379 106.0
48393 10.0
48397 378.0
48401 87.0
48417 4.0
48423 1637.0
48429 51.0
48433 3.0
48447 2.0
48451 2193.0
48485 1379.0
48493 71.0
48497 325.0
48499 139.0
49003 543.0
49005 1684.0
49007 236.0
49011 3567.0
49013 166.0
49015 71.0
49019 89.0
49021 374.0
49027 75.0
49029 149.0
49033 13.0
49041 359.0
49043 343.0
49045 658.0
49047 219.0
49051 443.0
49057 2751.0
50001 26.0
50007 204.0
50009 11.0
50021 59.0
51001 42.0
51003 111.0
51036 18.0
51037 34.0
51087 747.0
51091 2.0
51103 14.0
51115 10.0
51117 34.0
51119 5.0
51125 14.0
51131 9.0
51133 17.0
51145 61.0
51147 46.0
51149 117.0
51157 8.0
51159 24.0
51161 496.0
51163 49.0
51169 147.0
51173 179.0
51179 339.0
51185 262.0
51570 40.0
51610 11.0
51670 29.0
51790 85.0
53005 1227.0
53007 532.0
53009 78.0
53031 37.0
53039 18.0
53043 42.0
53053 2761.0
53055 19.0
53061 2299.0
53063 3147.0
53071 484.0
53073 303.0
54001 96.0
54007 15.0
54011 639.0
54017 18.0
54019 164.0

4012 86.0
5035 353.0
5095 31.0
5099 54.0
6039 375.0
6071 12541.0
6075 1304.0
6077 1815.0
6099 1751.0
12013 66.0
12077 14.0
12079 101.0
12086 17328.0
12093 138.0
12123 68.0
12125 27.0
13019 42.0
13063 730.0
13095 102.0
13099 34.0
13109 17.0
13215 306.0
13261 26.0
13267 26.0
13301 16.0
13303 107.0
16065 682.0
20057 555.0
21013 239.0
21045 111.0
21065 71.0
21075 26.0
21109 65.0
21121 121.0
21129 476.0
21203 48.0
24510 2960.0
26085 63.0
28013 47.0
29223 77.0
30035 65.0
31043 265.0
31047 317.0
34011 424.0
34017 3596.0
34031 3909.0
34039 3634.0
35005 1298.0
35019 84.0
35023 24.0
35033 12.0
35045 682.0
35051 69.0
35053 162.0
36047 347.0
36081 370.0
36087 1507.0
37083 224.0
38005 95.0
40001 159.0
40139 232.0
41049 29.0
45009 18.0
45033 84.0
45053 45.0
45075 173.0
45089 84.0
47007 65.0
47061 81.0
48003 199.0
48013 118.0
48017 101.0
48025 58.0
48069 75.0
48079 18.0
48095 34.0
48101 5.0
48103 74.0
48113 15536.0
48117 163.0
48153 45.0
48169 20.0
48177 57.0
48195 18.0
48201 12844.0
48207 3.0
48249 

In [221]:
for j in location: 
    if j in K:
        COVID_14days[j] = K[j]
        print (j, COVID_14days[j], K[j])

    if j in Q:
        COVID_14days[j] = Q[j]
        print (j, COVID_14days[j])
        
    if j in B:
        COVID_14days[j] = B[j]
        print (j, COVID_14days[j])
        
    if j in M:
        COVID_14days[j] = M[j]
        print (j, COVID_14days[j])

    if j in R:
        COVID_14days[j] = R[j]
        print (j, COVID_14days[j])

36085 1465
36061 2341
36047 3489 3489
36081 3792
36005 2161


In [222]:
print (covid_cases_county_ny_times)

{1001.0: 312.0, 1003.0: 799.0, 1005.0: 55.0, 1007.0: 94.0, 1009.0: 430.0, 1011.0: 18.0, 1013.0: 69.0, 1015.0: 737.0, 1017.0: 172.0, 1019.0: 124.0, 1021.0: 130.0, 1023.0: 8.0, 1025.0: 95.0, 1027.0: 63.0, 1029.0: 80.0, 1031.0: 299.0, 1033.0: 422.0, 1035.0: 70.0, 1037.0: 60.0, 1039.0: 147.0, 1041.0: 51.0, 1043.0: 851.0, 1045.0: 227.0, 1047.0: 164.0, 1049.0: 615.0, 1051.0: 394.0, 1053.0: 100.0, 1055.0: 858.0, 1057.0: 138.0, 1059.0: 148.0, 1061.0: 123.0, 1063.0: 34.0, 1065.0: 77.0, 1067.0: 53.0, 1069.0: 471.0, 1071.0: 361.0, 1073.0: 3816.0, 1075.0: 56.0, 1077.0: 704.0, 1079.0: 169.0, 1081.0: 512.0, 1083.0: 563.0, 1085.0: 27.0, 1087.0: 84.0, 1089.0: 1684.0, 1091.0: 77.0, 1093.0: 106.0, 1095.0: 806.0, 1097.0: 1299.0, 1099.0: 64.0, 1101.0: 835.0, 1103.0: 985.0, 1105.0: 36.0, 1107.0: 129.0, 1109.0: 101.0, 1111.0: 85.0, 1113.0: 86.0, 1115.0: 439.0, 1117.0: 1354.0, 1119.0: 45.0, 1121.0: 353.0, 1123.0: 211.0, 1125.0: 1154.0, 1127.0: 290.0, 1129.0: 85.0, 1131.0: 33.0, 1133.0: 164.0, 2013.0: 3.0, 20

In [223]:
#Parameters
pro_c_s = [(i,county_of_states[i]) for i in location ]
cartesian_pro_county_state = gp.tuplelist(pro_c_s)


In [224]:
df = covid_last14_stats
df['fips'] = df['fips'].astype(int)

In [225]:
###############################################################################################
######################## END calculating different types of vulnerabilities ###################

Since we allocate CHW proportional to the county values of certain vulnaribilites within state, we need a few function to help us with the calculations. 



In [226]:
# This function return the value for the state for the given dictionary

# More specifically sum upt the values for the counties of each state

def total_state(dict_1):
    state_dict = {}
    for s in State:
        state_dict [s] = sum(float(dict_1[j]) for (j,s) in cartesian_pro_county_state.select('*', s) if j in dict_1)  
    return state_dict



In [227]:
# Calculte the population per state by summing up the population in each county in the state
State_pop = total_state(population_county)

In [228]:


# This function returns the ratio of the dict value for county and state of the county
def Proportional(county_level, state_level):
    
    prop = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            prop[j] = (float(county_level[j])/float(state_level[s]))    
        else:
            prop[j] = 0
                
    return prop

In [229]:
print (ACI_total)
print (ACI_total[35039])

{1001: 9049, 1003: 30763, 1005: 7244, 1007: 4272, 1009: 9290, 1011: 2935, 1013: 4814, 1015: 25890, 1017: 7400, 1019: 5105, 1021: 9147, 1023: 3273, 1025: 6716, 1027: 3112, 1029: 3180, 1031: 9926, 1033: 9540, 1035: 2765, 1037: 2155, 1039: 6835, 1041: 3219, 1043: 15478, 1045: 10930, 1047: 13544, 1049: 17896, 1051: 12373, 1053: 6235, 1055: 21998, 1057: 4144, 1059: 6715, 1061: 6647, 1063: 3143, 1065: 4730, 1067: 3651, 1069: 22918, 1071: 11352, 1073: 126944, 1075: 3099, 1077: 15561, 1079: 6877, 1081: 23596, 1083: 14596, 1085: 3540, 1087: 5301, 1089: 48769, 1091: 5053, 1093: 6051, 1095: 20669, 1097: 90962, 1099: 5565, 1101: 52763, 1103: 22180, 1105: 3375, 1107: 4807, 1109: 6945, 1111: 4428, 1113: 14480, 1115: 12599, 1117: 20035, 1119: 4098, 1121: 19979, 1123: 9968, 1125: 34877, 1127: 13887, 1129: 4268, 1131: 4086, 1133: 5577, 2013: 444, 2016: 400, 2020: 49663, 2050: 8680, 2060: 141, 2068: 142, 2070: 1983, 2090: 12661, 2100: 606, 2105: 504, 2110: 4631, 2122: 11347, 2130: 2881, 2150: 2412, 2158

In [230]:
#ACI
ACI_State = total_state(ACI_total)

In [231]:
# Create dicts for the variables of SVI
E_POV = dict(zip(svi_county.FIPS, svi_county.EP_POV))
E_UNEMP = dict(zip(svi_county.FIPS, svi_county.EP_UNEMP))
E_PCI = dict(zip(svi_county.FIPS, svi_county.EP_PCI))
E_NOHSDP = dict(zip(svi_county.FIPS, svi_county.EP_NOHSDP))
E_AGE65 = dict(zip(svi_county.FIPS, svi_county.EP_AGE65))
E_AGE17 = dict(zip(svi_county.FIPS, svi_county.EP_AGE17))
E_DISABL = dict(zip(svi_county.FIPS, svi_county.EP_DISABL))
E_SNGPNT = dict(zip(svi_county.FIPS, svi_county.EP_SNGPNT))
E_MINRTY = dict(zip(svi_county.FIPS, svi_county.EP_MINRTY))
E_LIMENG = dict(zip(svi_county.FIPS, svi_county.EP_LIMENG))
E_MUNIT = dict(zip(svi_county.FIPS, svi_county.EP_MUNIT))
E_MOBILE = dict(zip(svi_county.FIPS, svi_county.EP_MOBILE))
E_CROWD = dict(zip(svi_county.FIPS, svi_county.EP_CROWD))
E_NOVEH = dict(zip(svi_county.FIPS, svi_county.EP_NOVEH))
E_GROUPQ = dict(zip(svi_county.FIPS, svi_county.EP_GROUPQ))


# Calculate the state value for the SVI variables
E_POV_State = total_state(E_POV)
E_UNEMP_State = total_state(E_UNEMP) 
E_PCI_State = total_state(E_PCI)
E_NOHSDP_State = total_state(E_NOHSDP) 
E_AGE65_State = total_state(E_AGE65)
E_AGE17_State = total_state(E_AGE17)
E_DISABL_State = total_state(E_DISABL)
E_SNGPNT_State = total_state(E_SNGPNT)
E_MINRTY_State = total_state(E_MINRTY)
E_LIMENG_State = total_state(E_LIMENG)
E_MUNIT_State = total_state(E_MUNIT)
E_MOBILE_State = total_state(E_MOBILE)
E_CROWD_State = total_state(E_CROWD)
E_NOVEH_State = total_state(E_NOVEH)
E_GROUPQ_State = total_state(E_GROUPQ)



# Calculate the proportinal values for the SVI variables
E_POV_Prop = Proportional(E_POV, E_POV_State )
E_UNEMP_Prop = Proportional(E_UNEMP, E_UNEMP_State ) 
E_PCI_Prop = Proportional(E_PCI, E_PCI_State )
E_NOHSDP_Prop = Proportional(E_NOHSDP, E_NOHSDP_State ) 
E_AGE65_Prop = Proportional(E_AGE65, E_AGE65_State )
E_AGE17_Prop = Proportional(E_AGE17, E_AGE17_State )
E_DISABL_Prop = Proportional(E_DISABL, E_DISABL_State )
E_SNGPNT_Prop = Proportional(E_SNGPNT, E_SNGPNT_State )
E_MINRTY_Prop = Proportional(E_MINRTY, E_MINRTY_State )
E_LIMENG_Prop = Proportional(E_LIMENG, E_LIMENG_State )
E_MUNIT_Prop = Proportional(E_MUNIT, E_MUNIT_State )
E_MOBILE_Prop = Proportional(E_MOBILE, E_MOBILE_State )
E_CROWD_Prop = Proportional(E_CROWD, E_CROWD_State )
E_NOVEH_Prop = Proportional(E_NOVEH, E_NOVEH_State )
E_GROUPQ_Prop = Proportional(E_GROUPQ, E_GROUPQ_State )

ACI_Prop = Proportional(ACI_total, ACI_State)


# SVI calculation 

We calculate the ratio of county value to state value by population for each SVI variables (we use EP-estimate percentage- values in the CDC data set), then we take the average of all 15 SVI variables. 

Let SVI variable set be K, where  

K = { Below Poverty, Unemployed, Income, No High School Diploma, Aged 65 or Older, Aged 17 or Younger, Civilian with a Disability, Single-Parent Households, Minority, Speaks English “Less than Well”, Multi-Unit Structures, Mobile Homes, Crowding, No Vehicle, Group Quarters }

We will use these variables in a county base and state base. While County base values are exactly same as the estimated values for these variables in the CDC website, to calculate the state base, we simply sum the county values for all of the counties in each state for each variable. Then we use the following formula to calculate the SVI value for each county.

Let $S$ is the set of states and $j$ is a county in the state $s$, where $s \in S$, $c^k_j$ SVI variable $k \in K$ value for county j, and $c_s$ SVI variable value for state s.

$SVI_j = \frac{1}{15}\sum_{k \in K} \frac{c^k_j}{c^k_s}$


In [232]:

from collections import Counter
# Sum all SVI variable values for each county
SVI_county_sum = dict(Counter(E_POV_Prop) + Counter(E_UNEMP_Prop) + Counter(E_PCI_Prop) + Counter(E_NOHSDP_Prop) + Counter(E_AGE65_Prop) + Counter(E_AGE17_Prop) + Counter(E_DISABL_Prop) + Counter(E_SNGPNT_Prop) + Counter(E_MINRTY_Prop) + Counter(E_LIMENG_Prop) + Counter(E_MUNIT_Prop) + Counter(E_MOBILE_Prop) + Counter(E_CROWD_Prop) + Counter(E_NOVEH_Prop) + Counter(E_GROUPQ_Prop))

# Divide the sum of all SVI variable values
SVI_county = {j: SVI_county_sum[j]/15 for j in SVI_county_sum }


# Proportional Allocation

We consider allocating 1 million CHW over the states proportional to Medicaid enrollment in each state. Further, we allocate CHW to counties in each state proportional to different county vulnerability criterias as follow.

- MEDICAID
- SVI
- YPLL
- UNEMPLOYMENT
- LAST 14 DAYS COVID CASES
- LAST 14 DAYS COVID CASES / POP
- COVID DEATHS / POP

To calculate the total number of allocated CHW to per county according to these vulnerability criterias, we define the following function called "Proportional_allocation", in which we multiply the CHW allocated to each state with the ratio of the chosen vulnerability criteria of the county to the chosen vulnerability criteria of the state, the function return a dictionary with the counties as keys and the number of CHW allocated to each county for the chosen vulnerability criteria as values. 

In [233]:
def Proportional_allocation(county_level, state_level, state_budget):
    prop_allocate = {}
       
    for (j,s) in cartesian_pro_county_state:
        if state_level[s] >= 1e-6 and j in county_level:
            #print (j,s, county_level[j],state_level[s], state_budget[s])
            prop_allocate[j,s] = (float(county_level[j])/float(state_level[s]))*float(state_budget[s])
        
        else:
            prop_allocate[j,s] = 0
            
    
    return prop_allocate

In [234]:
Medicaid_dem = Proportional_allocation(ACI_total, ACI_State, Medicaid_state)
Medicaid_demand = {m[0]: Medicaid_dem[m] for m in Medicaid_dem}
for s in State:
#    print (s)
#    print ( ACI_State[s])
    print (s,Medicaid_state[s])
#for m in Medicaid_demand:
#    print (m, Medicaid_demand[m])

NEW MEXICO 772102
ALABAMA 957116
ALASKA 231145
ARIZONA 1839932
ARKANSAS 830467
CALIFORNIA 11847711
COLORADO 1337805
CONNECTICUT 874974
DELAWARE 239009
FLORIDA 3892552
GEORGIA 1928703
IDAHO 340742
ILLINOIS 2987496
INDIANA 1602976
IOWA 699741
KANSAS 401103
KENTUCKY 1416013
LOUISIANA 1585024
MAINE 232455
MARYLAND 1372695
MASSACHUSETTS 1616404
MICHIGAN 2439425
MINNESOTA 1085778
MISSISSIPPI 632427
MISSOURI 923641
MONTANA 247333
NEBRASKA 254159
NEVADA 685073
NEW HAMPSHIRE 193436
NEW JERSEY 1759653
NEW YORK 6263164
NORTH CAROLINA 1851558
NORTH DAKOTA 96757
OHIO 2788134
OKLAHOMA 797220
OREGON 1053931
PENNSYLVANIA 3069309
RHODE ISLAND 305208
SOUTH CAROLINA 1048276
SOUTH DAKOTA 114059
TENNESSEE 1489536
TEXAS 4457644
UTAH 338812
VERMONT 161049
VIRGINIA 1497770
WASHINGTON 1780968
WEST VIRGINIA 521290
WISCONSIN 1112844
WYOMING 59302
HAWAII 351337
DISTRICT OF COLUMBIA 248591
PUERTO RICO 1622194


In [235]:
#print ('Med', Medicaid_demand[35039])
#print ('Med', Medicaid_demand[4017], 'Pop', population_county[4017], 'Med_capita', 100000*(Medicaid_demand[4017]/population_county[4017]))

In [236]:
#Further we create additional vulnerability values by considering SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita and the number of Medicaid enrolles in each county together

Covid_capita = {j: 100000*(COVID_14days[j]/population_county[j]) for j in location}

Covid_death_capita = {j: 100000*(County_covid_death[j]/population_county[j]) for j in location}

Medicaid_capita = {j: 100000*(Medicaid_demand[j]/population_county[j]) for j in location}

Unemployment_capita = dict(zip(df_unemp.FIPS, df_unemp.Unemployed_Rate))

#Three_month_lag_covid_death_capita = {j: 100000*(three_month_lag_death[j]/population_county[j]) for j in location}

#Two_month_lag_covid_death_capita = {j: 100000*(two_month_lag_death[j]/population_county[j]) for j in location}

#One_month_lag_covid_death_capita = {j: 100000*(one_month_lag_death[j]/population_county[j]) for j in location}

In [237]:
for j in location:
    print (j, County_covid_death[j], population_county[j], Covid_death_capita[j])

35039 19 38921 48.81683409984327
1001 39 55869 69.80615368093218
1009 35 57826 60.52640680662678
1013 41 19448 210.81859317153436
1015 111 113605 97.70696712292593
1017 48 33254 144.3435376195345
1031 15 52342 28.657674525237862
1033 40 55241 72.40998533697797
1039 32 37049 86.372101811115
1043 32 83768 38.200744914525835
1045 55 49172 111.85227365167168
1051 61 81209 75.11482717432797
1055 63 102268 61.602847420503
1067 6 17205 34.873583260680036
1069 38 105882 35.88900851891728
1071 24 51626 46.48820361833185
1077 53 92729 57.15579807827109
1079 35 32924 106.30543068885919
1083 44 98915 44.48263660718799
1089 129 372909 34.592889954385655
1095 54 96774 55.800111600223204
1097 356 413210 86.15473972072313
1103 44 119679 36.765013076646696
1111 21 22722 92.42144177449168
1113 3 57961 5.175894135711944
1115 55 89512 61.44427562784878
1117 75 217702 34.4507629695639
1121 41 79978 51.26409762684738
2261 1 9202 10.867202782003913
4021 247 462789 53.37205508341815
5009 41 37432 109.53195127

27173 8 9709 82.39777526006797
28003 23 36953 62.241225340297134
28033 93 184945 50.28521993024953
28045 37 47632 77.67887134699362
28057 33 23390 141.0859341598974
28059 112 143617 77.98519673854767
28081 91 85436 106.51247717589774
28089 99 106272 93.15718157181571
28093 41 35294 116.16705389017963
28095 77 35252 218.42732327243846
28109 66 55535 118.84397226973981
28121 94 155271 60.53931513289668
28129 16 15916 100.52777079668257
28159 24 17955 133.66750208855473
28161 26 12108 214.73406012553681
29003 8 17712 45.16711833785004
29007 11 25388 43.327556325823224
29011 4 11754 34.03096818104475
29013 8 16172 49.46821667078902
29021 65 87364 74.40135524930177
29025 1 9020 11.086474501108647
29031 63 78871 79.87726794385769
29033 4 8679 46.08825901601567
29037 36 105780 34.03289846851957
29039 4 14349 27.87650707366367
29041 0 7426 0.0
29043 20 88595 22.574637394886846
29045 2 6797 29.424746211563924
29047 60 249948 24.00499303855202
29049 41 20387 201.10854956589984
29051 38 76745 49.

39017 142 383134 37.06275089133306
39019 10 26914 37.15538381511481
39021 4 38885 10.286742960010287
39023 64 134083 47.73162891641744
39025 45 206428 21.799368302749627
39027 14 41968 33.358749523446434
39029 95 101883 93.24421149750204
39033 15 41494 36.14980479105413
39037 58 51113 113.4740672627316
39039 21 38087 55.13692335967653
39043 62 74266 83.48369374949506
39045 63 157574 39.98121517509234
39051 24 42126 56.97194131890044
39053 13 29898 43.48116930898388
39055 51 93649 54.45867014063151
39057 61 168937 36.10813498523118
39059 8 38875 20.578778135048232
39063 33 75783 43.54538616840188
39065 18 31365 57.38880918220947
39067 3 15040 19.946808510638295
39069 22 27006 81.4633785084796
39071 17 43161 39.387409930261114
39073 11 28264 38.91876592131334
39077 18 58266 30.892801977139328
39081 4 65325 6.123230003827019
39083 18 62322 28.88225666698758
39085 66 230149 28.677074417008114
39089 74 176862 41.8405310354966
39091 11 45672 24.084778420038536
39093 97 309833 31.307188065829

55119 12 20343 58.98834980091432
55121 11 29649 37.10074538770279
55123 7 30822 22.71105054830965
55127 47 103868 45.24974005468479
55131 66 136034 48.51728244409486
55135 107 50990 209.84506766032555
55137 7 24443 28.638055885120483
55141 22 72999 30.137399142454008
56003 10 11790 84.81764206955046
56007 6 14800 40.54054054054054
56009 6 13822 43.40905802344089
56011 4 7584 52.74261603375527
56013 25 39261 63.6764218945009
56015 6 13211 45.41669820604042
56019 4 8445 47.365304914150386
56021 18 99500 18.090452261306535
56025 36 79858 45.08001703022865
56029 4 29194 13.701445502500514
56031 5 8393 59.573454068866916
56033 10 30485 32.80301787764474
56035 1 9831 10.171905197843556
56043 7 7805 89.68609865470852
56045 0 6927 0.0
1003 84 223234 37.62867663527957
1019 24 26196 91.61704076958314
1021 36 44428 81.02998109300441
1027 23 13235 173.78163959199094
1041 30 13772 217.83328492593665
1049 34 71513 47.54380322458854
1057 16 16302 98.1474665685192
1061 8 26271 30.45182901298009
1073 4

21077 9 8869 101.47705491036193
21081 6 25069 23.933942319199012
21085 19 26427 71.89616679910698
21105 1 4380 22.831050228310502
21111 460 766757 59.99293126766368
21113 27 54115 49.893744802734915
21137 20 24549 81.46971363395657
21139 2 9194 21.753317380900587
21155 12 19273 62.263269859388785
21167 6 21933 27.356038845575164
21173 8 28157 28.41211776822815
21177 20 30622 65.31252041016263
21183 14 23994 58.347920313411684
21187 3 10901 27.520410971470508
21197 0 12359 0.0
21221 1 14651 6.825472663981981
21225 6 14381 41.72171615325777
21233 6 12942 46.36068613815484
22005 105 126604 82.93576822217308
22011 48 37497 128.01024081926553
22023 3 6973 43.02308905779435
22053 47 31368 149.8342259627646
22057 134 97614 137.27539082508656
22073 200 153279 130.4810182738666
22079 180 129648 138.8374676045909
22081 26 8442 307.9838900734423
22089 70 53100 131.82674199623352
22099 70 53431 131.0100877767588
22111 56 22108 253.30197213678306
22121 43 26465 162.47874551294163
22123 19 10830 175

53071 21 60760 34.56221198156682
53073 62 229247 27.045064929966365
54001 4 16441 24.329420351560124
54007 0 13957 0.0
54011 47 91945 51.1175159062483
54017 2 8448 23.674242424242426
54019 31 42406 73.10286280243362
54027 1 23175 4.314994606256742
54039 132 178124 74.1056791897779
54061 7 105612 6.628034693027308
54063 8 13275 60.263653483992464
54069 14 41411 33.80744246697738
54071 1 6969 14.349261013057827
54077 6 33432 17.946877243359655
54081 15 73361 20.446831422690533
54093 0 6839 0.0
55001 7 20220 34.619188921859546
55007 6 15036 39.904229848363926
55013 9 15414 58.38847800700662
55019 34 34774 97.77419911428079
55025 71 546695 12.987131764512206
55029 10 27668 36.14283648980772
55033 5 45368 11.020983953447363
55037 11 4295 256.1117578579744
55051 9 5687 158.25567082820467
55059 122 169561 71.95050748698108
55063 32 118016 27.114967462039047
55085 26 35595 73.04396684927659
55099 3 13351 22.470226949292186
55113 6 16558 36.23626041792487
55125 11 22195 49.56071187204325
55129 

47173 5 19972 25.035049068696175
47181 9 16673 53.97948779463803
48001 46 57735 79.67437429635403
48015 11 30032 36.62759722962174
48019 11 23112 47.59432329525788
48021 34 88723 38.32151753209427
48027 112 362924 30.86045563258423
48033 0 654 0.0
48039 193 374264 51.56787722035782
48051 7 18443 37.9547795911728
48063 15 13094 114.5562853215213
48073 47 52646 89.27553850245032
48075 0 7306 0.0
48081 9 3387 265.72187776793623
48085 229 1034730 22.131377267499733
48089 4 21493 18.61071046387196
48099 16 75951 21.06621374307119
48123 44 20160 218.25396825396825
48139 103 184826 55.728090203759216
48171 10 26988 37.05350526159774
48193 7 8461 82.73253752511523
48197 4 3933 101.70353419781337
48205 2 5576 35.86800573888092
48227 32 36664 87.27907484180668
48243 0 2274 0.0
48267 2 4337 46.11482591653217
48275 9 3664 245.63318777292577
48297 10 12207 81.92020971573687
48299 4 21795 18.352833218628124
48305 9 5951 151.2350865400773
48309 174 256623 67.80374323423855
48319 2 4274 46.79457182966

45033 51 30479 167.3283244200925
45053 23 30073 76.48056396102817
45075 133 86175 154.33710472874964
45089 50 30368 164.64699683877765
47007 5 15064 33.191715347849176
47061 14 13427 104.26752066731214
48003 13 18705 69.50013365410318
48013 38 51153 74.28694309229175
48017 6 7000 85.71428571428571
48025 40 32565 122.83126055581145
48069 7 7530 92.96148738379814
48079 4 2853 140.2032947774273
48095 2 2726 73.36757153338225
48101 4 1398 286.1230329041488
48103 7 4797 145.9245361684386
48113 1429 2635516 54.22088122401837
48117 36 18546 194.11193788417987
48153 11 5712 192.57703081232495
48169 13 6229 208.70123615347566
48177 22 20837 105.58141767049
48195 9 5399 166.69753658084832
48201 2943 4713325 62.439997241862166
48207 3 5658 53.022269353128316
48249 54 40482 133.3926189417519
48255 18 15601 115.37721940901224
48271 2 3667 54.540496318516496
48279 35 12893 271.4651361203754
48301 0 169 0.0
48341 27 20940 128.93982808022923
48357 6 9836 61.000406669377796
48369 22 9605 229.0473711608

72087 0 24553 0.0
72095 0 10321 0.0
72097 0 71530 0.0
72107 0 20220 0.0
72113 0 131881 0.0
72123 0 27128 0.0
72127 0 318441 0.0
72083 0 7927 0.0
72001 0 17363 0.0
72055 0 15383 0.0
72073 0 13891 0.0
72079 0 22010 0.0
72093 0 5430 0.0
72019 0 27725 0.0


In [238]:
County_covid_death[9009], population_county[9009], Covid_death_capita[9009]

(1180, 854757, 138.05093143431407)

In [239]:
# Dictionary for the total Covid per capita for each state
Total_covid_cap = total_state(Covid_capita) 


# Dictionary for the total Covid death per capita for each state
Total_covid_death_cap = total_state(Covid_death_capita) 


# Dictionary for the total Medicaid per capita for each state
Total_medicaid_cap = total_state(Medicaid_capita)


# Dictionary for the total Unemployment per capita for each state
Total_unemployment_cap = total_state(Unemployment_capita)



In [240]:
# Dictionaries for the different vulnerability criteria values for states

# Dictionary for total Medicaid patient numbers for each state
Medicaid_demand_state = total_state(Medicaid_demand) 

# Dictionary for total positive COVID cases for last 14 days in each state
Covid_state = total_state(COVID_14days) 

# Dictionary for total SVI values for each state
SVI_state = total_state(SVI_county) 

# Dictionary for total YPLL values for each state
YPLL_state = total_state(YPLL) 

# Dictionary for total Unemployment numbers for each state
Unemployment_state = total_state(Unemployment)  

In [241]:
print(Medicaid_demand_state)

{'NEW MEXICO': 772102.0, 'ALABAMA': 957115.9999999999, 'ALASKA': 231145.00000000006, 'ARIZONA': 1839932.0, 'ARKANSAS': 830466.9999999999, 'CALIFORNIA': 11847710.999999998, 'COLORADO': 1337804.9999999998, 'CONNECTICUT': 874974.0, 'DELAWARE': 239009.0, 'FLORIDA': 3892552.0, 'GEORGIA': 1928703.000000001, 'IDAHO': 340741.9999999999, 'ILLINOIS': 2987495.999999999, 'INDIANA': 1602975.9999999995, 'IOWA': 699741.0000000003, 'KANSAS': 401103.00000000006, 'KENTUCKY': 1416012.9999999995, 'LOUISIANA': 1585023.9999999998, 'MAINE': 232455.00000000003, 'MARYLAND': 1372695.0, 'MASSACHUSETTS': 1616404.0, 'MICHIGAN': 2439425.0000000005, 'MINNESOTA': 1085777.9999999998, 'MISSISSIPPI': 632427.0000000002, 'MISSOURI': 923641.0000000002, 'MONTANA': 247333.0, 'NEBRASKA': 254158.99999999997, 'NEVADA': 685073.0000000001, 'NEW HAMPSHIRE': 193436.0, 'NEW JERSEY': 1759653.0000000002, 'NEW YORK': 6263164.0, 'NORTH CAROLINA': 1851558.0000000005, 'NORTH DAKOTA': 96757.00000000001, 'OHIO': 2788134.0, 'OKLAHOMA': 79722

# 1 million CHW allocation to states

We allocate 1 million CHWs to states proportional to total Medicaid enrolles in each state.

Let's $FedCHW$ represents the number of CHW will be allocated within states by the federal government, which is 1 million in our project. $TotMed$ represents the total Medicaid enrollee numbers over the US, $Med_s$ is the total Medicaid enrollee numbers in state $s \in S$, and $CHW_s$ is the total number of CHW allocated to state $ s\in S$. 

$CHW_s = FedCHW*\frac{Med_s}{TotMed}$



In [242]:
# We consider allocation of 1 million CHW all over the US
Federal_budget_CHW = 1000000

# First, we calculate the Total Medicaid enrolles all over the US
Total_federal_need = sum(Medicaid_demand_state[s] for s in State)

# Allocate the 1 million CHWs proportional to Medicaid enrolles in each state
Medicaid_budget_state = {s: (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW  for s in State}

In [243]:
for s in State:
    print (s, Medicaid_demand_state[s], Total_federal_need, Federal_budget_CHW, (Medicaid_demand_state[s]/Total_federal_need)*Federal_budget_CHW)

NEW MEXICO 772102.0 76256043.0 1000000 10125.12542776446
ALABAMA 957115.9999999999 76256043.0 1000000 12551.3462585516
ALASKA 231145.00000000006 76256043.0 1000000 3031.1696084204114
ARIZONA 1839932.0 76256043.0 1000000 24128.343507149984
ARKANSAS 830466.9999999999 76256043.0 1000000 10890.5073923125
CALIFORNIA 11847710.999999998 76256043.0 1000000 155367.50313676777
COLORADO 1337804.9999999998 76256043.0 1000000 17543.593233653624
CONNECTICUT 874974.0 76256043.0 1000000 11474.159497103725
DELAWARE 239009.0 76256043.0 1000000 3134.295861640762
FLORIDA 3892552.0 76256043.0 1000000 51045.81678857897
GEORGIA 1928703.000000001 76256043.0 1000000 25292.461084035018
IDAHO 340741.9999999999 76256043.0 1000000 4468.393409818024
ILLINOIS 2987495.999999999 76256043.0 1000000 39177.17052273483
INDIANA 1602975.9999999995 76256043.0 1000000 21020.97010200227
IOWA 699741.0000000003 76256043.0 1000000 9176.203910816621
KANSAS 401103.00000000006 76256043.0 1000000 5259.950349115283
KENTUCKY 1416012.99

In [244]:
print (Covid_state)

{'NEW MEXICO': 21803.0, 'ALABAMA': 25201.0, 'ALASKA': 7293.0, 'ARIZONA': 33846.0, 'ARKANSAS': 19959.0, 'CALIFORNIA': 115081.0, 'COLORADO': 58871.0, 'CONNECTICUT': 21060.0, 'DELAWARE': 4113.0, 'FLORIDA': 81317.0, 'GEORGIA': 34358.0, 'IDAHO': 17152.0, 'ILLINOIS': 156138.0, 'INDIANA': 74818.0, 'IOWA': 54921.0, 'KANSAS': 31804.0, 'KENTUCKY': 31906.0, 'LOUISIANA': 25311.0, 'MAINE': 2297.0, 'MARYLAND': 23129.0, 'MASSACHUSETTS': 29145.0, 'MICHIGAN': 85561.0, 'MINNESOTA': 79579.0, 'MISSISSIPPI': 13941.0, 'MISSOURI': 55881.0, 'MONTANA': 14094.0, 'NEBRASKA': 28237.0, 'NEVADA': 20987.0, 'NEW HAMPSHIRE': 4084.0, 'NEW JERSEY': 44243.0, 'NEW YORK': 50403.0, 'NORTH CAROLINA': 36548.0, 'NORTH DAKOTA': 17020.0, 'OHIO': 86437.0, 'OKLAHOMA': 32512.0, 'OREGON': 12266.0, 'PENNSYLVANIA': 65002.0, 'RHODE ISLAND': 8532.0, 'SOUTH CAROLINA': 18312.0, 'SOUTH DAKOTA': 17112.0, 'TENNESSEE': 52412.0, 'TEXAS': 136077.0, 'UTAH': 38682.0, 'VERMONT': 986.0, 'VIRGINIA': 22070.0, 'WASHINGTON': 24849.0, 'WEST VIRGINIA': 1

# Proportional allocation for different vulnerability values

Let V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}. We assume $v_j$ represent the vulnerability value for county $j \in J$, while $v_s$ represent the sum of the vulnerability values for each county in the state of county j.

$Prop_{v_j} = \frac{v_j}{v_s}*CHW_s$



In [245]:
# Calling proportional allocation function for different vulnerability criterias

# Proportional allocation according to cumulative Covid death in per capita in each county
Proportional_to_covid_death_cap = Proportional_allocation(Covid_death_capita, Total_covid_death_cap,Medicaid_budget_state)

# Propportional allocation according to Medicaid enrollee number in each county
Proportional_to_medicaid = Proportional_allocation(Medicaid_demand, Medicaid_demand_state,Medicaid_budget_state )

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_medicaid_cap = Proportional_allocation(Medicaid_capita, Total_medicaid_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases in each county
Proportional_to_covid = Proportional_allocation(COVID_14days, Covid_state, Medicaid_budget_state)

# Proportional allocation according to SVI score in each county
Proportional_to_SVI = Proportional_allocation(SVI_county, SVI_state, Medicaid_budget_state)

# Proportional allocation according to YPLL in each county
Proportional_to_YPLL = Proportional_allocation(YPLL, YPLL_state, Medicaid_budget_state)

# Proportional allocation according to Unemployment  in each county
Proportional_to_unemployment = Proportional_allocation(Unemployment, Unemployment_state, Medicaid_budget_state)

# Proportional allocation according to Medicaid enrolles per capita in each county
Proportional_to_unemployment_cap = Proportional_allocation(Unemployment_capita, Total_unemployment_cap, Medicaid_budget_state )

# Proportional allocation according to last 14 days positive COVID cases per capita in each county
Proportional_to_covid_capita = Proportional_allocation(Covid_capita, Total_covid_cap, Medicaid_budget_state)



# Normalize values for comparison
To be able compare the different vulnerability values for each county, we normalize all vulnerability values as follows. 

Let $m_{v_s} = \min \{v_j, \text{ for county j in state s }\}$  and 
$M_{v_s} = \max \{v_j, \text{ for county j in state s }\}$.

We calculate the normalize value for each vulnerability for each county by substracting the min vulnerability in the state of the county and dividing that by the differences between max and min value of the vulnerability values in the state. Mathematical formulation for the normalization is as follows.

$N_{v_j} = \frac{v_j - m_{v_s}}{M_{v_s} - m_{v_s}}$ 

for each $v \in V$, where V = {Medicaid, SVI, YPLL, Unemployment, COVID, COVID_capita, COVID_death and COVID_death capita, SVI and Medicaid, YPLL and Medicaid, Unemployment and Medicaid, COVID and Medicaid, COVID_capita and Medicaid, COVID_death and Medicaid and COVID_death capita and Medicaid}, j is a county in each state $s \in S$.


In [246]:
#Normalize function to normalize the vulnerability values to be able to compare them

def normalize(dict_1):
    
    result = {}
    min_data = {s: min(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    max_data = {s: max(dict_1[j] for j in location if (j,s) in cartesian_pro_county_state) for s in State }
    
    for (j,s) in cartesian_pro_county_state:
        
        if (max_data[s] - min_data[s]) != 0 :
    
            result[j] = (dict_1[j] - min_data[s])/(max_data[s] - min_data[s])
        
        else:
            result[j] = 1
    
    return result 

# Percentile Rank

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.percentileofscore.html"> The function scipy.stats.percentileofscore (a, score, kind='rank')   </a>
computes the percentile rank of a score relative to a list of scores. 
"rank": Average percentage ranking of score. In case of multiple matches, average the percentage rankings of all matching scores.

In [247]:
from scipy import stats

# Calculate percentile ranks

def percentile_ranks(data):
    x = {s: [] for s in State}

    for (j,s) in cartesian_pro_county_state:
         
        x[s].append(data[j])
    
    
    
    percentile_ranks = {i: stats.percentileofscore(x[s], data[i], 'rank') for (i,s) in cartesian_pro_county_state}

    return percentile_ranks

In [248]:
# Write timestamp 

time_stamp = time.strftime('%m-%d-%Y %H:%M:%S')
with open('Output/time_stamp.csv','w') as f:
    w = csv.writer(f)
    now = time.strftime('%m/%d/%Y %H:%M:%S')
    w.writerow(['time',now])
    

In [249]:
#print (Medicaid_demand)

In [250]:
#Write a function to order the dicts
def order_k(dict_1):
    dict_2 = {}
    for m in location:
        if m in dict_1.keys():
            dict_2[m] = dict_1[m]
        else:
            dict_2[m] = 0
    
    return dict_2
            

In [251]:


Medicaid_demand = order_k(Medicaid_demand)#{m: Medicaid_demand[m] for m in location}
COVID_14days = order_k(COVID_14days)#{m: COVID_14days[m] for m in location}
SVI_county = order_k(SVI_county)#{m: SVI_county[m] for m in location}
YPLL = order_k(YPLL)#{m: YPLL[m] for m in location}
Unemployment = order_k(Unemployment)#{m: Unemployment[m] for m in location}
Unemployment_capita = order_k(Unemployment_capita)#{m: Unemployment_capita[m] for m in location}

In [252]:
# Write file allocation with each strategies for each county 

Strategies = ["Medicaid_demand", "Medicaid_capita", "Covid", "SVI"
              , "YPLL","Unemployment", "Unemployment_capita", "Covid_capita",  "Covid_death_capita" ]

fieldnames = []  
fieldnames.append('County_FIPS')


SVI_values = {i:SVI_county[i] for i in location}
s_count = 1
for s in Strategies:   
    fieldnames.append('Proportional_allocation_to_' + s)
    fieldnames.append(s)
    fieldnames.append('Percentile_ranks_' + s)

    
        

writefile = 'Output/County_level_proportional_allocation_for_all_policies.csv'
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(fieldnames)
    for row in zip(location
                   , Proportional_to_medicaid.values(),         Medicaid_demand.values(),      percentile_ranks(Medicaid_demand).values()
                   , Proportional_to_medicaid_cap.values(),     Medicaid_capita.values(),      percentile_ranks(Medicaid_capita).values()
                   , Proportional_to_covid.values(),            COVID_14days.values(),         percentile_ranks(COVID_14days).values()
                   , Proportional_to_SVI.values(),              SVI_county.values(),           percentile_ranks(SVI_county).values()
                   , Proportional_to_YPLL.values(),             YPLL.values(),                 percentile_ranks(YPLL).values()
                   , Proportional_to_unemployment.values(),     Unemployment.values(),         percentile_ranks(Unemployment).values()
                   , Proportional_to_unemployment_cap.values(), Unemployment_capita.values(),  percentile_ranks(Unemployment_capita).values()
                   , Proportional_to_covid_capita.values(),     Covid_capita.values(),         percentile_ranks(Covid_capita).values()
                   , Proportional_to_covid_death_cap.values(),  Covid_death_capita.values(),   percentile_ranks(Covid_death_capita).values() ):                    
       
        writer.writerow(row)

In [253]:
writefile = 'Output/State_level_allocation.csv'

cl = ['State', 'CHW_allocation']
with open( writefile, 'w' ) as f:
    writer = csv.writer(f)                
    writer.writerow(cl)
    for row in zip( State, Medicaid_demand_state.values()):
        writer.writerow(row)