# Weekly COVID-19 Cases Dashboard Update
Center for Human Dynamics in the Mobile Age (HDMA) at San Diego State University

Jessica Embury

### MODULES

In [None]:
#import modules
from arcgis.gis import GIS
from arcgis import geometry
from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcgis.features import FeatureLayerCollection
from arcgis.features import FeatureLayer

from copy import deepcopy
import numpy as np
import os
import pandas as pd
import requests
import sys
import webbrowser

#ignore pandas slice warning
pd.options.mode.chained_assignment = None  # default='warn'

### CONFIRM USER VARIABLES BEFORE RUNNING CELLS

In [None]:
###########################
###SET DATA THROUGH DATE###
###########################
most_recent_date = '07/06/2021'  # Change date to the data through date using 'MM/DD/YYYY' format


##########################
###SET PATHS IN AND OUT###
##########################
#PATHS IN
#additional needed zip info - community name and population
zips_info_path = './data/zip_coords_pop.csv'

#PATHS OUT
#wide df with a column for each date
wide_df_path = './data/covid_accumulated_formatted.csv'

#path out for dash updates, feature layer overwrite
path_out= './data/covid_percents_upload.csv'
path_out2 = '../covid_data/percents/covid_percents_{}.csv'

#path to csv used to append new column to feature layer 2 for cumulative cases dashboard
append_csv_path = './data/sd_zip_cumulative_covid_append.csv'

#upload to dropbox
dropbox_cumulative = 'C:/Users/jesse/Dropbox/Mapping-Vulearable-Pop-Tasks/SD-County-Data/COVID-Data-Share-at-HDMA-Center/SD_Zipcode_COVID_{}.csv'

# 7 day dash layer overwrite
seven_path = 'sandiego_covid_upload_7day.csv'

################################
###ARCGIS DETAILS FOR UPDATES###
################################

#feature layer IDs to overwrite
feature_layer2 = '2a2645b5f569461d916122c3e16d96f3'
seven_layer = 'f6558646808b4b88ba1d77e984b9f7e8'

#csv to append to feature_layer2
append_csv = '49be034d6b7a406ca291cb44e94e1be1'

#map IDs for symbology update
#confirmed cases map
cc = "763a114f5f114139af5517ac4c785bd8"

# seven day maps
seven_map = 'cd2e0028e1f049e7bd268d06c26cfe22'
seven_map_mobile = 'ee46036487c3491caf69209d2a67940c'

#urls for dashboards (to verify)
cumulative_dash = 'https://arcg.is/1zXq1m'
cumulative_mobile = 'https://arcg.is/1WLnjG'
seven_dash = 'https://experience.arcgis.com/experience/a630917e020440ba9a598bf1c32b7a74'

## SAVE SD COUNTY COVID-19 PDF FILES TO DROPBOX

In [None]:
#specify directory for new folder
pdf_path = 'C:/Users/jesse/Dropbox/Mapping-Vulearable-Pop-Tasks/SD-County-Data/{}'.format(most_recent_date.replace('/', '-'))

#create folder if it doesn't exist
if not os.path.exists(pdf_path):
    os.makedirs(pdf_path)

#base url for pdfs
url = 'https://www.sandiegocounty.gov/content/dam/sdc/hhsa/programs/phs/Epidemiology/'

#pdf names
pdfs = ['COVID-19%20Percentage%20Positive.pdf', 'COVID-19%20Cases%20by%20Date%20of%20Illness%20Onset.pdf', 
        'COVID-19%20Daily%20Update_City%20of%20Residence.pdf', 'COVID-19%20City%20of%20Residence_MAP.pdf', 
        'COVID-19%20Race%20and%20Ethnicity%20Summary.pdf', 'COVID-19%20Summary%20of%20Cases%20by%20Zip%20Code.pdf', 
        'COVID-19%20Hospitalizations%20by%20Date%20Admitted.pdf', 'COVID-19%20Hospitalizations%20Summary_ALL.pdf', 
        'COVID-19%20Deaths%20by%20Date%20of%20Death.pdf', 'COVID-19%20Deaths%20by%20Demographics.pdf', 
        'COVID-19_Daily_Status_Update.pdf', 'COVID-19%20Watch.pdf', 'Summary_County_of_San_Diego_Supported_Tests_by_Race_Ethnicity.pdf',
        'Summary_of_All_Tests_Reported_by_Race_Ethnicity.pdf', 'Summary_of_All_Tests_Reported_by_Zip_Code_of_Residence.pdf', 
        'Summary_Tests_Among_San_Diego_County_Residents_by_Race_Ethnicity.pdf', 'COVID19%20HHSA%20Region%20Dashboard.pdf', 
        'COVID19%20NORTH%20COASTAL%20Dashboard.pdf', 'COVID19%20NORTH%20INLAND%20Dashboard.pdf', 'COVID19%20NORTH%20CENTRAL%20Dashboard.pdf', 
        'COVID19%20CENTRAL%20Dashboard.pdf', 'COVID19%20EAST%20Dashboard.pdf', 'COVID19%20SOUTH%20Dashboard.pdf', 
        'COVID-19%20Vaccinations%20Demographics.pdf', 'COVID-19%20Vaccine%20Report%20by%20Zipcode.pdf',
        'Summary_Public_Health_Rooms_by_Race_Ethnicity.pdf', 'COSD_Case_Investigators_by_Race_Ethnicity.pdf', 'COSD_Contact_Tracers_by_Race_Ethnicity.pdf',
        'COVID-19%20Variant%20Case%20Summary.pdf', 'COVID-19%20Homeless%20Summary.pdf', 'COVID-19%20Vaccinations%20by%20Health%20Equity%20Zip%20Codes.pdf',
        'COVID-19%20Health%20Equity%20Zip%20Codes%20Summary%20and%20Vaccinations%20Report.pdf', 'COVID-19%20Vaccinations%20by%20Census%20Tract.pdf',
        'COVID-19%20Vaccinations%20Demographics%20Report%2065%2bResidents.pdf', 'COVID-19%20Deaths%20by%20Zip%20Code.pdf']

In [None]:
#for each pdf in list, get and save
for i in range(len(pdfs)):
    print(pdfs[i])
    response = requests.get(url + pdfs[i], stream=True)

    with open(pdf_path + '/' + pdfs[i].replace('%20', ' '), 'wb') as f:
        f.write(response.content)

#pdf with a different url
response = requests.get(url + '/covid19/MediaBriefingSlides/mediaBriefingSlides.pdf', stream=True)
with open(pdf_path + '/' + 'mediaBriefingSlides.pdf', 'wb') as f:
        f.write(response.content)

## FORMAT CONFIRMED CASES FROM ZIP PDF, SAVE TO CSV FILE

In [None]:
# save info from cumulative case by zip PDF to csv file. Copy/paste data will result in all data in one column.
raw = pd.read_csv('./data/zip_cases_raw.csv')

# zip code
raw1 = raw.iloc[0::3, :].reset_index()
print(len(raw1))
# vacc count
raw2 = raw.iloc[1::3, :].reset_index()
print(len(raw2))
# rate
raw3 = raw.iloc[2::3, :].reset_index()
print(len(raw3))

# create empty DF
cols = ['Zipcode', most_recent_date] 
t = pd.DataFrame(columns = cols)

# add raw data to t data frame
for i in range(110):
    temp = [raw1['cases'][i], raw2['cases'][i]] 
    t = t.append({'Zipcode': temp[0], most_recent_date: temp[1]}, ignore_index=True) 

#format numbers in counts column
for i, row in t.iterrows():
    t[most_recent_date][i] = t[most_recent_date][i].replace(',', '')
    
t[most_recent_date] = t[most_recent_date].astype(int)
print(type(t[most_recent_date][0]))

print(len(t))
t.head()

In [None]:
#add new column to csv with all dates
wide = pd.read_csv(wide_df_path)
wide['Zipcode'] = wide['Zipcode'].astype(str)

wide = wide.merge(t, on='Zipcode', how='left')
wide[most_recent_date] = wide[most_recent_date].fillna(0)
print(len(wide))
wide.head(2)

In [None]:
#save updated table to csv
wide.to_csv(wide_df_path, index=False)
wide.to_csv(dropbox_cumulative.format(most_recent_date.replace('/', '')), index=False)

### MERGE WIDE DF WITH COORDINATES/COMMUNITY/POPULATION CSV DATA
### SUBSET DF WITH ONLY COLUMNS FOR HDMA RATES/PERCENTS FEATURE LAYER

In [None]:
wide = pd.read_csv(wide_df_path)
wide['Zipcode'] = wide['Zipcode'].astype(str)
wide.head(2)

In [None]:
#import additional needed zip info
zips_info = pd.read_csv(zips_info_path)
zips_info = zips_info.rename(columns = {'Zip':'Zipcode'})
zips_info['Zipcode'] = zips_info['Zipcode'].astype(str)

#merge wide df with coords and extra zip code info
wide = wide.merge(zips_info, on='Zipcode')

#create new df for use feature layer overwrite
cols = ['Zipcode', 'Community', 'Latitude', 'Longitude', '2018_population']
df = wide[cols]

df['Date'] = most_recent_date

df['Confirmed Cases'] = wide[most_recent_date]

df['Rate Per 100K'] = (df['Confirmed Cases']/df['2018_population']*100000).round(2)

df = df.fillna(0)
df.head(2)

### TEMPORARY DF TO CALCULATE CASE INCREASE AND RATES OF CHANGE, MERGE TO MAIN DF

In [None]:
#CREATE DF
date_df = wide[['Zipcode', wide.columns[-6], wide.columns[-5]]]

#CREATE COLUMN FOR CASE INCREASES (column in feature layer, but no longer used due to weekly update change)
date_df['Daily Increased'] = np.nan

#CREATE COLUMN FOR DAILY CHANGE RATE (column in feature layer, but no longer used due to weekly update change)
date_df['Daily Change Rate*1000'] = np.nan

#CREATE COLUMN FOR 7 DAY ROLLING RATE OF CHANGE AND 7 DAY INCREASE COLUMN
date_df['7 Days Rolling Change*1000'] = np.nan

date_df['7 Day Case Increase'] = date_df.iloc[:,2] - date_df.iloc[:,1]
print(date_df['7 Day Case Increase'].sum())

#CALCULATE 7 DAY CHANGE RATE
try:
    date_df['7 Days Rolling Change*1000'] = round(date_df.iloc[:,6]/date_df.iloc[:,1]/7*1000, 2)
except:
    pass            

print(len(date_df))
date_df.head()

# MERGE WITH MAIN DF

In [None]:
#MERGE NEW COLUMNS WITH MAIN DF
date_df_subset = date_df[['Zipcode', 'Daily Increased', 'Daily Change Rate*1000', '7 Days Rolling Change*1000', '7 Day Case Increase']]

df =df.merge(date_df_subset, on='Zipcode')
print(len(df))

df.head()

In [None]:
#format conf cases, 7 day increase to int type
df['Confirmed Cases'] = df['Confirmed Cases'].astype(int)

df['7 Day Case Increase'] = df['7 Day Case Increase'].astype(int)

#SUBSET DATA TO POPULATION >= 5000 AND CONFIRMED CASES >= 10
df = df[df['2018_population'] >= 5000]
df = df[df['Confirmed Cases'] >= 10]
print(len(df))

df.head()

# CALCULATE NEW/CUMULATIVE CASE PERCENTS

In [None]:
#get cumulative case totals for county (after subset)
sum_confirmed = df['Confirmed Cases'].sum()
print(sum_confirmed)

#add new columns and calculate percent of total for each zip code
df['percent_total'] = round((df['Confirmed Cases']/sum_confirmed)*100, 2)
df['percent_daily'] = np.nan #column in feature layer, but no longer used due to weekly update change

df.head()

### OUTPUT CSV FOR RECORDS

In [None]:
#save as csv files
df.to_csv(path_out, index = False) 
df.to_csv(path_out2.format(most_recent_date.replace('/','')), index = False)
df.to_csv(seven_path, index = False)

# CONNECT TO ARCGIS ACCOUNT
Reference for authentication schemes: https://developers.arcgis.com/python/guide/working-with-different-authentication-schemes/

In [None]:
#gis = GIS(portal, username, password)
gis = GIS("pro")

### FIND MAX/MIN VALUES FOR MAP SYMBOLOGY CHANGES

In [None]:
max_confirmed = df['Confirmed Cases'].max()
max_7day = df['7 Day Case Increase'].max()

print(max_confirmed, max_7day)

### FUNCTIONS

In [None]:
def get_map (map_id):
    '''
    GET MAP DATA FOR SYMBOLOGY CHANGES
    '''
    
    m = gis.content.get(map_id)

    data = m.get_data()
    
    #Include the below line for prettified JSON
    #print(json.dumps(data, indent=4, sort_keys=True))

    print(m)
    
    return data
    
def update_map (map_id, data):
    '''
    UPDATE MAP TO SAVE CHANGES
    '''
    m = gis.content.get(map_id)
    
    # Set the item_properties to include the desired update
    properties = {"text": json.dumps(data)}

    # 'Commit' the updates to the Item
    update = m.update(item_properties=properties)
    
    return update

# UPDATE CUMULATIVE CASE DASHBOARD

### APPEND DATA TO CUMULATIVE CASES FEATURE LAYER
Reference: https://developers.arcgis.com/python/guide/appending-features/

In [None]:
#get feature layer containing updated data for maps associated with the CUMULATIVE COVID-19 dashboard
layer2 = gis.content.get(feature_layer2)
layer2

In [None]:
#List all the current fields in the layer so you can use one as a field template.
cum_covid_lyr = layer2.layers[0]
cum_covid_lyr

In [None]:
#reformat date for use in appending: append_source = column name in csv, append_field = column name in feature layer, append_alias = column alias
split = most_recent_date.split('/')

append_source = ''

for i in range(len(split)):
    if(split[i][0] is '0'):
        split[i] = split[i][1:]
    if(i == 0):
        append_source += split[i]
    elif(i == (len(split)-1)): 
        append_source += '_' + split[i]
    else:
        append_source += '_' + split[i]

append_field = 'F' + append_source
append_alias = append_source.replace('_','/')

print(append_source, append_field, append_alias)

#Create a dictionary from a deep copy of a field in the feature layer, and update the values of this dictionary to reflect a new field.
new_field = dict(deepcopy(cum_covid_lyr.properties.fields[5]))
new_field['name'] = append_field
new_field['alias'] = append_alias
new_field['length'] = "10"
print(new_field)

#Update feature layer definition with the new field using the add_to_definition() method.
field_list = [new_field]
cum_covid_lyr.manager.add_to_definition({"fields":field_list})

In [None]:
#only need to add index to 'Zipcode' once - cell kept for reference

#Add a unique index to the new attribute field, needed to append
#flds = [f.fields.lower() for f in cum_covid_lyr.properties.indexes if f.isUnique]

#for fld in cum_covid_lyr.properties.fields:
#    if fld.name.lower() in flds:
#        print(f"{fld.name:30}{fld.type:25}isUnique")
#    else:
#        print(f"{fld.name:30}{fld.type:25}")

#Create a copy of one index, then edit it to reflect values for a new index. Then add that to the layer definition.
#name_idx = dict(deepcopy(cum_covid_lyr.properties['indexes'][0]))
#name_idx['name'] = 'Zipcode'
#name_idx['fields'] = 'Zipcode'
#name_idx['isUnique'] = True
#name_idx['description'] = 'index_name'
#name_idx

#index_list = [name_idx]
#cum_covid_lyr.manager.add_to_definition({"indexes":index_list})

#Verify the index was added
#layer2 = gis.content.get(feature_layer2)
#layer2

#flds = [f.fields.lower() for f in cum_covid_lyr.properties.indexes if f.isUnique]

#for fld in cum_covid_lyr.properties.fields:
#    if fld.name.lower() in flds:
#        print(f"{fld.name:30}{fld.type:25}isUnique")
#    else:
#        print(f"{fld.name:30}{fld.type:25}") 

In [None]:
#update csv item to contain new date column for addition to the feature layer
append_df2 = wide[['Zipcode', '{}'.format(most_recent_date)]]
append_df2 = append_df2.rename(columns = {'{}'.format(most_recent_date): '{}'.format(append_source)})

append_df2 = append_df2[append_df2.index.notnull()]
append_df2 = append_df2.fillna(0)
append_df2['{}'.format(append_source)] = append_df2['{}'.format(append_source)].astype(int)
append_df2.to_csv(append_csv_path)

append_csv_item = gis.content.get(append_csv)
#append_csv_item
append_csv_item.update({}, append_csv_path)

#get *append_csv_info* when appending a new column for source_info
append_csv_info = gis.content.analyze(item=append_csv, file_type='csv', location_type='none')

# append_csv_info
#append new date column to feature layer from csv item
cum_covid_lyr.append(item_id= append_csv,
                      upload_format = 'csv',
                      field_mappings = [{"name":"{}".format(append_field), "source":"{}".format(append_source)},
                                        {"name":"Zipcode", "source":"Zipcode"}],
                      source_info = append_csv_info['publishParameters'],
                      update_geometry=False,
                      append_fields=["{}".format(append_field), "Zipcode"],
                      skip_inserts=True,
                      upsert_matching_field="Zipcode")

### MODIFY CONFIRMED CASES WEB MAP SYMBOLOGY

In [None]:
#confirmed cases map

#get map data
cc_data = get_map(cc)

#set symbol to new date field, adjust max symbology
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['field'] = append_field
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['field'] = append_field
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = max_confirmed.item()
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = max_confirmed.item()

#set labeling to new date field
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['labelingInfo'][0]['labelExpressionInfo']['expression'] = '$feature["{}"]'.format(append_field)
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['labelingInfo'][0]['labelExpressionInfo']['value'] = ('{' + append_field + '}')
cc_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['labelingInfo'][0]['fieldInfos'][0]['fieldName'] = append_field

#set filter to new date
cc_data['operationalLayers'][2]['layerDefinition']['definitionExpression'] = ('{} > 0'.format(append_field))

#adjust last date in popup
new_date = cc_data['operationalLayers'][2]['popupInfo']['fieldInfos'][-1].copy()
new_date['fieldName'] = append_field
new_date['label'] = append_alias
cc_data['operationalLayers'][2]['popupInfo']['fieldInfos'][-1]['visible'] = False
cc_data['operationalLayers'][2]['popupInfo']['fieldInfos'].append(new_date)
#cc_data['operationalLayers'][2]['popupInfo']['fieldInfos']

#add new date to popup chart
popup_chart=cc_data['operationalLayers'][2]['popupInfo']['mediaInfos'][0]['value']['fields']
popup_chart.append(append_field)
cc_data['operationalLayers'][2]['popupInfo']['mediaInfos'][0]['value']['fields'] = popup_chart

#update map to save changes
cc_update = update_map(cc, cc_data)
cc_update

# UPDATE 7 DAY DASHBOARD

### OVERWRITE

In [None]:
#get feature layer containing updated data for maps associated with the COVID-19 dashboards
layer = gis.content.get(seven_layer)
layer

layer_collection = FeatureLayerCollection.fromitem(layer)

#call the overwrite() method which can be accessed using the manager property
layer_collection.manager.overwrite(seven_path)

### MAP SYMBOLOGY

In [None]:
#seven day map

#get map data
sm_data = get_map(seven_map)

#adjust symbology for graduated points to reflect new max/min
#MAX DAILY INCREASE
sm_data['operationalLayers'][4]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = max_7day.item()
sm_data['operationalLayers'][4]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = max_7day.item()

sm_data['operationalLayers'][5]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = max_7day.item()
sm_data['operationalLayers'][5]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = max_7day.item()

#update map to save changes
sm_update = update_map(seven_map, sm_data)
sm_update

In [None]:
#seven day map mobile

#get map data
smm_data = get_map(seven_map_mobile)

#adjust symbology for graduated points to reflect new max/min
#MAX DAILY INCREASE
smm_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = max_7day.item()
smm_data['operationalLayers'][2]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = max_7day.item()

smm_data['operationalLayers'][3]['layerDefinition']['drawingInfo']['renderer']['authoringInfo']['visualVariables'][0]['maxSliderValue'] = max_7day.item()
smm_data['operationalLayers'][3]['layerDefinition']['drawingInfo']['renderer']['visualVariables'][0]['maxDataValue'] = max_7day.item()

#update map to save changes
smm_update = update_map(seven_map_mobile, smm_data)
smm_update

# VIEW UPDATED DASHBOARDS

In [None]:
#open cumulative/growth chart map
webbrowser.open(cumulative_dash, new=2)

#open seven day cases dash
webbrowser.open(seven_dash, new=2)