# COVID-19 TEST RESULTS DASHBOARD UPDATES
### <a href="https://hdma-sdsu.github.io/index.html">COVID-19 Research Hub</a>
Center for Human Dynamics in the Mobile Age (HDMA) at SDSU <br>
Jessica Embury

#### IMPORT STATEMENTS

In [14]:
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection
import json
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import sys
import tabula
import webbrowser

#### VARIABLES: DATES, PATHS, AGOL IDS

In [15]:
######################
# USER ENTERED DATES #
######################
most_recent_date = '12/16/20'  # file folder date
data_date = '12/12/20' # date through
prior_date = '12/05/20'  # date through of prior week
day_after_prior = '12/6/20'  # one date after the prior date

#########
# PATHS #
#########
# pdf file path
file = 'C:/Users/jesse/Dropbox/Mapping-Vulearable-Pop-Tasks/SD-County-Data/{}20/Summary_of_All_Tests_Reported_by_Zip_Code_of_Residence.pdf'.format(most_recent_date.replace('/', '-'))

# csv files
new_csv = '../covid_data/tests/covid_tests_total_{}.csv'.format(data_date.replace('/',''))  # this week's total results
change_csv = '../covid_data/tests/covid_tests_change_{}.csv'.format(data_date.replace('/',''))  # this week's new results
prior_csv = '../covid_data/tests/covid_tests_total_{}.csv'.format(prior_date.replace('/',''))  # last week's total results
zips_csv = './data/zip_coords_pop.csv'  # zip code spatial data
total_upload = './data/sd_zips_covid_test_results.csv'  # csv for upload to AGOL
change_upload = './data/sd_zips_covid_tests_new.csv'  # csv for upload to AGOL

############
# AGOL IDs #
############
total_layer_id = '8ae9d6bc96ff4ac7b6240696c77fc4b6'
weekly_layer_id = '812242e64749440baf7e1281533f464a'

total_map_id = 'f218f594126140ed939d2b5e7baa8baa'
weekly_map_id = 'ad5c0361faa44573adcb9c5b3a8dedb0'

total_dash = 'https://arcg.is/11nm9S'
weekly_dash = 'https://arcg.is/1qmieu'

# CUMULATIVE TEST RESULTS

#### USE TABULA TO CONVERT PDF TO TABLE

In [16]:
tables = tabula.read_pdf(file, pages = "all", multiple_tables = True)
tables[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,,,,,Percent,Tests per,,,,,Percent,Tests per
1,Zip Code,Positive,Negative Indeterminate,Total,Postive,"100,000*",Zip Code,Positive Negative Indeterminate,Total,,Positive,"100,000*"
2,91901,591,"8,780 41",9412,6.3%,52330,92071,"1,520 35,881","181 37,582",,4.0%,64559
3,91902,693,"10,450 70",11213,6.2%,63598,92075,"177 7,964","30 8,171",,2.2%,62965
4,91905,20,646 1,667,3.0%,*,92078,"1,264 24,468","143 25,875",,4.9%,50763
...,...,...,...,...,...,...,...,...,...,...,...,...
56,92065,1114,"14,136 77",15327,7.3%,42274,Subtotal,"117,654 1,914,135","10,317 2,042,106",,5.8%,60926
57,92066,12,93 0,105,11.4%,*,,,,,,
58,92067,169,"6,624 8",6801,2.5%,*,**Other,"6,022 97,411","248 103,681",,5.8%,*
59,92069,1806,"20,643 189",22638,8.0%,44240,***Unknown,"5,349 118,958","341 124,648",,4.3%,*


#### CREATE TABLE WITH LEFT COLUMN ZIPS, CLEAN DATA

In [17]:
# rename columns
t1 = tables[0][[0, 1, 2, 3, 4, 5]]
t1 = t1.rename(columns = {0: 'Zip', 1: 'Positive', 2: 'Neg Indet', 3: 'Total', 
                         4: 'Percent Positive', 5: 'Tests Per 100K'})

# drop bad rows
t1 = t1.drop(t1.index[0:2])

# create new columns for clean data
t1['Negative'] = 'NaN'
t1['Indeterminate'] = 'NaN'

# clean data
for i, row in t1.iterrows():
    temp = t1['Neg Indet'][i]
    temp_list = temp.split( )
    t1['Negative'][i] = temp_list[0]
    t1['Indeterminate'][i] = temp_list[1]
    if len(temp_list) == 3:
        t1['Total'][i] = temp_list[2]

# delete bad columns
del t1['Neg Indet']

print(len(t1))
t1.head()

59


Unnamed: 0,Zip,Positive,Total,Percent Positive,Tests Per 100K,Negative,Indeterminate
2,91901,591,9412,6.3%,52330,8780,41
3,91902,693,11213,6.2%,63598,10450,70
4,91905,20,667,3.0%,*,646,1
5,91906,121,2219,5.5%,*,2076,22
6,91910,4749,59574,8.0%,71062,54566,259


#### CREATE TABLE WITH RIGHT COLUMN ZIPS, CLEAN DATA

In [18]:
# rename columns
t2 = tables[0][[6, 7, 8, 9, 10, 11]]
t2 = t2.rename(columns = {6: 'Zip', 7: 'Pos Neg', 8: 'Indet Total', 9: 'Total', 
                         10: 'Percent Positive', 11: 'Tests Per 100K'})

# delete bad rows
t2 = t2.drop(t2.index[0:2])
t2 = t2.drop(t2.index[55])

# create new columns for clean data
t2['Negative'] = 'NaN'
t2['Indeterminate'] = 'NaN'

# clean data
for i, row in t2.iterrows():
    temp = t2['Indet Total'][i]
    temp_list = str(temp).split( )
    t2['Indeterminate'][i] = temp_list[0]
    t2['Total'][i] = temp_list[1]
    
    temp2 = t2['Pos Neg'][i]
    temp_list2 = temp2.split( )
    t2['Pos Neg'][i] = temp_list2[0]
    t2['Negative'][i] = temp_list2[1]

t2 = t2.rename(columns = {'Pos Neg':'Positive'})

# delete bad columns
del t2['Indet Total']

print(len(t2))
t2.head()

58


Unnamed: 0,Zip,Positive,Total,Percent Positive,Tests Per 100K,Negative,Indeterminate
2,92071,1520,37582,4.0%,64559,35881,181
3,92075,177,8171,2.2%,62965,7964,30
4,92078,1264,25875,4.9%,50763,24468,143
5,92081,716,14480,4.9%,44024,13643,121
6,92082,479,8675,5.5%,49070,8155,41


#### MERGE INTO ONE DATAFRAME

In [19]:
# merge t1 and t2
t1 = t1.append(t2, ignore_index=True)

# new dataframe with properly ordered columns
df = t1[['Zip', 'Positive', 'Negative', 'Indeterminate', 'Total', 'Percent Positive', 'Tests Per 100K']]
df = df.rename(columns = {'ZIP':'Zip'})

# format data
for i,row in df.iterrows():
    df['Positive'][i] = int(df['Positive'][i].replace(',',''))
    df['Negative'][i] = int(df['Negative'][i].replace(',',''))
    df['Indeterminate'][i] = int(df['Indeterminate'][i].replace(',',''))   
    df['Total'][i] = int(df['Total'][i].replace(',',''))
    df['Percent Positive'][i] = round((df['Positive'][i]/df['Total'][i]*100), 1)

# add date column
df['Date'] = data_date

print(len(df))
df.head()

117


Unnamed: 0,Zip,Positive,Negative,Indeterminate,Total,Percent Positive,Tests Per 100K,Date
0,91901,591,8780,41,9412,6.3,52330,12/12/20
1,91902,693,10450,70,11213,6.2,63598,12/12/20
2,91905,20,646,1,667,3.0,*,12/12/20
3,91906,121,2076,22,2219,5.5,*,12/12/20
4,91910,4749,54566,259,59574,8.0,71062,12/12/20


#### ADD COMMUNITY NAMES AND COORDINATES

In [20]:
# dataframe with zip code spatial data
zips = pd.read_csv(zips_csv)
zips['Zip'] = zips['Zip'].astype(str)

# merge test data with zip code data
zips = zips.merge(df, on='Zip')
zips.head()

Unnamed: 0,Zip,Community,2018_population,Latitude,Longitude,Positive,Negative,Indeterminate,Total,Percent Positive,Tests Per 100K,Date
0,91901,Alpine,17885,32.80571,-116.695537,591,8780,41,9412,6.3,52330,12/12/20
1,91902,Bonita,17375,32.671583,-117.015068,693,10450,70,11213,6.2,63598,12/12/20
2,91905,Boulevard,2014,32.718365,-116.305469,20,646,1,667,3.0,*,12/12/20
3,91906,Campo,3686,32.660427,-116.469681,121,2076,22,2219,5.5,*,12/12/20
4,91910,Chula Vista,82682,32.636413,-117.065653,4749,54566,259,59574,8.0,71062,12/12/20


#### CALCULATE POSITIVE/100K

In [21]:
# calculate positive per 100k
zips = zips.rename(columns={'2018_population':'Population 2018'})
zips['Positive Cases Per 100K'] = ''
for i, row in zips.iterrows():
    if zips['Population 2018'][i] >= 1000:
        zips['Positive Cases Per 100K'][i] = round(zips['Positive'][i]/zips['Population 2018'][i]*100000, 0)
    else:
        zips['Positive Cases Per 100K'][i] = '*'

# save as csv file
zips.to_csv(new_csv, index=False)
zips.to_csv(total_upload, index=False)  # for AGOL upload
zips.head()

Unnamed: 0,Zip,Community,Population 2018,Latitude,Longitude,Positive,Negative,Indeterminate,Total,Percent Positive,Tests Per 100K,Date,Positive Cases Per 100K
0,91901,Alpine,17885,32.80571,-116.695537,591,8780,41,9412,6.3,52330,12/12/20,3304
1,91902,Bonita,17375,32.671583,-117.015068,693,10450,70,11213,6.2,63598,12/12/20,3988
2,91905,Boulevard,2014,32.718365,-116.305469,20,646,1,667,3.0,*,12/12/20,993
3,91906,Campo,3686,32.660427,-116.469681,121,2076,22,2219,5.5,*,12/12/20,3283
4,91910,Chula Vista,82682,32.636413,-117.065653,4749,54566,259,59574,8.0,71062,12/12/20,5744


# WEEKLY TEST RESULTS

#### COMBINE CUMULATIVE TEST DATA (MOST RECENT WEEK AND PRIOR WEEK)

In [25]:
# create dataframe with culumative results from prior week
prior = pd.read_csv(prior_csv)

prior = prior.rename(columns = {'Positive':'old pos', 'Negative':'old neg', 'Indeterminate':'old indet', 
                                'Total':'old total', 'Date':'old date'    
})

# create dataframe with cumulative results from this week
new = pd.read_csv(new_csv)
new = new.rename(columns = {'Positive':'new pos', 'Negative':'new neg', 'Indeterminate':'new indet', 
                                'Total':'new total', 'Date':'new date'    
})

# combine into one dataframe for weekly values
change = prior[['Zip','Community','Population 2018','Latitude','Longitude','old pos','old neg', 'old indet', 'old total', 'old date']].merge(
    new[['Zip', 'new pos', 'new neg', 'new indet', 'new total', 'new date']], on='Zip')

change.head()

Unnamed: 0,Zip,Community,Population 2018,Latitude,Longitude,old pos,old neg,old indet,old total,old date,new pos,new neg,new indet,new total,new date
0,91901,Alpine,17885,32.80571,-116.695537,480,8198,33,8711,12/05/20,591,8780,41,9412,12/12/20
1,91902,Bonita,17375,32.671583,-117.015068,601,9722,64,10387,12/05/20,693,10450,70,11213,12/12/20
2,91905,Boulevard,2014,32.718365,-116.305469,14,613,1,628,12/05/20,20,646,1,667,12/12/20
3,91906,Campo,3686,32.660427,-116.469681,103,1981,21,2105,12/05/20,121,2076,22,2219,12/12/20
4,91910,Chula Vista,82682,32.636413,-117.065653,4129,50816,250,55195,12/05/20,4749,54566,259,59574,12/12/20


#### CALCULATE NEW WEEKLY VALUES

In [26]:
# add columns for weekly values
change['Positive'] = 0
change['Negative'] = 0
change['Indeterminate'] = 0
change['Total'] = 0
change['Percent Positive'] = 0
change['Date Range'] = '{} - {}'.format(day_after_prior, data_date)

# calculate weekly values
for i, row in change.iterrows():
    change['Positive'][i] = change['new pos'][i] - change['old pos'][i]
    change['Negative'][i] = change['new neg'][i] - change['old neg'][i]
    change['Indeterminate'][i] = change['new indet'][i] - change['old indet'][i]
    change['Total'][i] = change['new total'][i] - change['old total'][i]
    change['Percent Positive'] = round(change['Positive']/change['Total']*100, 1)

# delete old/new cumulative values
del change['old pos']
del change['old neg']
del change['old indet']
del change['old total']
del change['old date']
del change['new pos']
del change['new neg']
del change['new indet']
del change['new total']
del change['new date']

change.head()

Unnamed: 0,Zip,Community,Population 2018,Latitude,Longitude,Positive,Negative,Indeterminate,Total,Percent Positive,Date Range
0,91901,Alpine,17885,32.80571,-116.695537,111,582,8,701,15.8,12/6/20 - 12/12/20
1,91902,Bonita,17375,32.671583,-117.015068,92,728,6,826,11.1,12/6/20 - 12/12/20
2,91905,Boulevard,2014,32.718365,-116.305469,6,33,0,39,15.4,12/6/20 - 12/12/20
3,91906,Campo,3686,32.660427,-116.469681,18,95,1,114,15.8,12/6/20 - 12/12/20
4,91910,Chula Vista,82682,32.636413,-117.065653,620,3750,9,4379,14.2,12/6/20 - 12/12/20


#### CALCULATE POSITIVE/100K

In [27]:
# calculate positive per 100k
change['Positive Cases Per 100K'] = ''
for i, row in change.iterrows():
    if change['Population 2018'][i] >= 1000:
        change['Positive Cases Per 100K'][i] = round(change['Positive'][i]/change['Population 2018'][i]*100000, 0)
    else:
        change['Positive Cases Per 100K'][i] = '*'

# save as csv
change.to_csv(change_csv, index=False)
change.to_csv(change_upload, index=False)  # for AGOL upload
change.head()

Unnamed: 0,Zip,Community,Population 2018,Latitude,Longitude,Positive,Negative,Indeterminate,Total,Percent Positive,Date Range,Positive Cases Per 100K
0,91901,Alpine,17885,32.80571,-116.695537,111,582,8,701,15.8,12/6/20 - 12/12/20,621
1,91902,Bonita,17375,32.671583,-117.015068,92,728,6,826,11.1,12/6/20 - 12/12/20,529
2,91905,Boulevard,2014,32.718365,-116.305469,6,33,0,39,15.4,12/6/20 - 12/12/20,298
3,91906,Campo,3686,32.660427,-116.469681,18,95,1,114,15.8,12/6/20 - 12/12/20,488
4,91910,Chula Vista,82682,32.636413,-117.065653,620,3750,9,4379,14.2,12/6/20 - 12/12/20,750


# UPDATE AGOL DASHBOARDS

#### CONNECT TO AGOL ACCOUNT

In [28]:
gis = GIS("pro")

#### FUNCTIONS FOR MAP MODIFICATIONS

In [29]:
def get_map (map_id):
    '''
    GET MAP DATA FOR SYMBOLOGY CHANGES
    '''    
    m = gis.content.get(map_id)
    data = m.get_data()
    print(m)    
    return data

def update_map (map_id, data):
    '''
    UPDATE MAP TO SAVE CHANGES
    '''
    m = gis.content.get(map_id)  
    # Set the item_properties to include the desired update
    properties = {"text": json.dumps(data)}
    # 'Commit' the updates to the Item
    update = m.update(item_properties=properties)    
    return update

#### OVERWRITE FEATURE LAYERS

In [30]:
#########
# TOTAL #
#########

#get feature layer 
total_layer = gis.content.get(total_layer_id)
total_layer

total_layer_collection = FeatureLayerCollection.fromitem(total_layer)

#call the overwrite() method which can be accessed using the manager property
total_layer_collection.manager.overwrite(total_upload)

{'success': True}

In [31]:
##########
# WEEKLY #
##########

#get feature layer 
weekly_layer = gis.content.get(weekly_layer_id)
weekly_layer

weekly_layer_collection = FeatureLayerCollection.fromitem(weekly_layer)

#call the overwrite() method which can be accessed using the manager property
weekly_layer_collection.manager.overwrite(change_upload)

{'success': True}

#### UPDATE MAP SYMBOLOGY

In [32]:
#########
# TOTAL #
#########

#get max for graduated point symbology
total_max = zips['Total'].max()
total_max = numpy.int64(total_max)
print(total_max)

#get map data
total_map = get_map(total_map_id)

#set max value for graduated points symbols
total_map['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = total_max.item()
total_map['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = total_max.item()

#update map to save changes
total_map_update = update_map(total_map_id, total_map)
total_map_update

61074
<Item title:"San Diego Zip Code COVID-19 Testing: Total Tests and Percent Positive" type:Web Map owner:jembury8568_SDSUGeo>


True

In [33]:
##########
# WEEKLY #
##########

#get max for graduated point symbology
weekly_max = change['Total'].max()
print(weekly_max)

#get map data
weekly_map = get_map(weekly_map_id)

#set max value for graduated points symbols
weekly_map['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = weekly_max.item()
weekly_map['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = weekly_max.item()

#update map to save changes
weekly_map_update = update_map(weekly_map_id, weekly_map)
weekly_map_update

4998
<Item title:"San Diego Zip Code COVID-19 Testing: New Tests and Percent Positive" type:Web Map owner:jembury8568_SDSUGeo>


True

#### CHECK DASHBOARDS

In [34]:
webbrowser.open(total_dash, new=2)
webbrowser.open(weekly_dash, new=2)

True