In [1]:
import pandas as pd
pd.options.display.max_columns =200
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

## I. Calculating demand for contact tracers
This section outlines the method used to calculate demand for contact tracers for COVID-19 at the county level. This is an implementation of the [Contact Tracing Workforce Estimator](https://www.gwhwi.org/estimator-613404.html) developed by George Washington University's Fitzhugh Mullan Institute for Health Workforce Equity. The following uses their methodology exactly. For further details on their methodology see this [brief](https://www.gwhwi.org/uploads/4/3/3/5/43358451/contact_tracing_brief_05.05.20.pdf). 

Their model sets a baseline need of 15 contact tracers per 100,000 people and then increases this value based on the total number of new cases in the last 14 days, and an estimated number of contacts per case. 

This notebook relies on data collected by the [New York Times](https://github.com/nytimes/covid-19-data) for cumulative cases by county in the U.S. which is updated daily. 


In [2]:
#### replace with desired (or most recent) date in NY Times dataset:
covid_data_update_date = '2020-06-08'

#### Parameters for GW model

# base_need = contact tracers per total population   
base_need = 15

# contacts = estimated contacts per case (depends on social distancing measures)
contacts = 10

# case_interviews = number of confirmed case interviews per day  
case_interviews = 6

# cont_notifications = number of contact notifications per day  
cont_notifications = 12

# contact_followup = number of contact follow ups per day  
contact_followup = 32

# follow_freq = follow up frequency (per contact per week)  
followup_freq = 7

In [4]:
## 14 day period defined
data_date_dt = pd.to_datetime(covid_data_update_date,infer_datetime_format=True)
N = 14
date_N_days_ago = data_date_dt - timedelta(days=N)

date_N1_days_ago = data_date_dt - timedelta(days=N+1)

In [5]:
## data imports
svi = pd.read_csv("data/svi/svi_2018_counties_state_ranked.csv")
covid = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")

In [6]:
covid['dt'] = pd.to_datetime(covid['date'], infer_datetime_format=True)

In [7]:
## subset last last 15 days
covid_last15 = covid[(covid['dt']>date_N1_days_ago) & (covid['dt']<= data_date_dt)].copy()
covid_last15['dt_time_delta'] = covid_last15['dt']-data_date_dt
 

In [8]:
## calculate new daily cases

## sort values by county and date
covid_last15.sort_values(by=['fips','dt'],inplace=True)
## remove data with 'unknown' counties
covid_last15 = covid_last15[covid_last15['fips'].notnull()].copy()

## calculate daily difference in number of cases
covid_last15['new_cases']=covid_last15.groupby('fips')['cases'].transform(lambda x: x.diff())
covid_last15.sort_index(inplace=True)


In [9]:
## select just last 14 days now that we have daily new cases with 15th day as baseline
covid_last14 = covid_last15[(covid_last15['dt']>date_N_days_ago) & (covid_last15['dt']<= data_date_dt)].copy()


In [15]:
## calculate follow up encounter demand
covid_last14['fe_demand'] = covid_last14['new_cases']*(14+((covid_last14['dt']-data_date_dt).dt.days))/7*followup_freq*contacts


In [16]:
## group by to get case load and follow up demand values for each county

covid_last14_stats = covid_last14.groupby(['fips'])['new_cases','fe_demand'].sum()

  covid_last14_stats = covid_last14.groupby(['fips'])['new_cases','fe_demand'].sum()


In [17]:
# adding population information from CDC svi dataset

covid_last14_stats = covid_last14_stats.reset_index()
covid_last14_stats['fips'] = covid_last14_stats['fips'].astype(int)

covid_last14_stats = covid_last14_stats.merge(svi[['FIPS','E_TOTPOP']],left_on='fips',right_on='FIPS')

In [18]:
## calculations for each type of demand

covid_last14_stats['base_ct'] = (covid_last14_stats['E_TOTPOP']/100000)*base_need
covid_last14_stats['case_int_need'] = covid_last14_stats['new_cases']/case_interviews/5
covid_last14_stats['contact_notify_need'] = (covid_last14_stats['new_cases']*contacts/cont_notifications)/5
covid_last14_stats['contact_follow_need'] = covid_last14_stats['fe_demand']/(contact_followup*5*2)
covid_last14_stats['covid_need'] = covid_last14_stats['case_int_need']+covid_last14_stats['contact_notify_need']+covid_last14_stats['contact_follow_need']
covid_last14_stats['total_need'] = covid_last14_stats[['covid_need','base_ct']].max(axis=1)

In [22]:
covid_last14_stats.sort_values(by="new_cases")

Unnamed: 0,fips,new_cases,fe_demand,FIPS,E_TOTPOP,base_ct,case_int_need,contact_notify_need,contact_follow_need,covid_need,total_need
1092,22039,-82.0,-1950.0,22039,33636,5.04540,-2.733333,-13.666667,-6.09375,-22.49375,5.04540
1927,38101,-10.0,610.0,38101,69034,10.35510,-0.333333,-1.666667,1.90625,-0.09375,10.35510
1925,38097,-7.0,100.0,38097,8019,1.20285,-0.233333,-1.166667,0.31250,-1.08750,1.20285
1907,38059,-4.0,20.0,38059,30544,4.58160,-0.133333,-0.666667,0.06250,-0.73750,4.58160
2107,41033,-3.0,-110.0,41033,85481,12.82215,-0.100000,-0.500000,-0.34375,-0.94375,12.82215
...,...,...,...,...,...,...,...,...,...,...,...
2450,48113,3349.0,268270.0,48113,2586552,387.98280,111.633333,558.166667,838.34375,1508.14375,1508.14375
2492,48201,3997.0,313220.0,48201,4602523,690.37845,133.233333,666.166667,978.81250,1778.21250,1778.21250
96,4013,5624.0,521230.0,4013,4253913,638.08695,187.466667,937.333333,1628.84375,2753.64375,2753.64375
588,17031,9722.0,666330.0,17031,5223719,783.55785,324.066667,1620.333333,2082.28125,4026.68125,4026.68125


In [19]:
## saving simplified form
covid_demand = covid_last14_stats[['fips','total_need']]

In [20]:
covid_demand

Unnamed: 0,fips,total_need
0,1001,42.50000
1,1003,31.21605
2,1005,33.78125
3,1007,7.97500
4,1009,10.95000
...,...,...
2981,56037,6.61755
2982,56039,3.45885
2983,56041,7.41250
2984,56043,2.81875
