# Clean & Process COVID-19 Ethnicity Files for Pie Chart Symbology Maps
Center for Human Dynamics in the Mobile Age (HDMA) at San Diego State University

Jessica Embury

### MODULES

In [17]:
import arcpy
import pandas as pd
import os

### USER SET VARIABLE FOR FILE DATE
Use county's naming convention for file date

In [18]:
date = '26DEC2020'

### CREATE LIST OBJECT WITH ALL FILE NAMES

In [19]:
#file directory
path = '../covid_data/ethnicity/{}/'.format(date)

#file names
base = 'COVID19_DailyTotalRate_'

white_file = base + 'White_' + date + '.csv'
multi_file = base + 'MultipleRace_' + date + '.csv'
hispanic_file = base + 'Hispanic_' + date + '.csv'
black_file = base + 'Black_' + date + '.csv'
api_file = base + 'API_' + date + '.csv'
aian_file = base + 'AIAN_' + date + '.csv'

files = [aian_file, api_file, black_file, hispanic_file, white_file, multi_file]
print(files)

['COVID19_DailyTotalRate_AIAN_26DEC2020.csv', 'COVID19_DailyTotalRate_API_26DEC2020.csv', 'COVID19_DailyTotalRate_Black_26DEC2020.csv', 'COVID19_DailyTotalRate_Hispanic_26DEC2020.csv', 'COVID19_DailyTotalRate_White_26DEC2020.csv', 'COVID19_DailyTotalRate_MultipleRace_26DEC2020.csv']


### DATA FRAME WITH SUPPLEMENTAL SRA DATA, COORDINATES

In [20]:
#path
sra_in = './data/sra_info.csv'

#base sra data
sra = pd.read_csv(sra_in)
print(len(sra))
sra.head()

41


Unnamed: 0,sra_num,sra_name,latitude,longitude
0,1,Central San Diego,32.722644,-117.141073
1,2,Peninsula,32.742274,-117.216035
2,3,Coronado,32.657317,-117.143062
3,4,National City,32.665847,-117.099737
4,5,Southeastern San Diego,32.685705,-117.038621


### ADD A COLUMN FOR EACH ETHNICITY TO THE SRA DATAFRAME

In [21]:
#add ethnicity data as columns to the sra df
for f in range(len(files)):
    
    #not every week has had a file for each ethnicity, so check if file for ethnicity exists
    if os.path.exists(path + files[f]):
        
        #ethnicity column name
        ethnicity = files[f].split('_')[2]
        if ethnicity == 'MultipleRace':
            ethnicity = 'Multiple\nRace'
    
        #temp df for specific ethnicity
        df = pd.read_csv(path + files[f])

        #add date to sra df
        df = df.rename(columns = {'date':'Date'})
        if f == 0:
            sra['Date'] = df['Date'][0]
    
        #add ethnicity columns to sra df
        #multiple column name options to catch naming variations in files from the county
        df = df.rename(columns = {'Allocated {}\nCase Count (Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated {} Case Count (Raw)'.format(ethnicity.replace('\n',' ')):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated\n{} Case Count (Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated\n{} Case\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')), 
                                  'Allocated\n{}\nCase\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')), 
                                  'Allocated\n{}\nRace\nCase\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated {} Race Case Count (Raw)'.format(ethnicity.replace('\n',' ')):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Geography':'sra_name'})
        subset_df = df[['sra_name', ethnicity.replace('\n',' ')]].fillna(0)
    
        sra = sra.merge(subset_df, how='left', on='sra_name')

sra = sra.rename(columns = {'sra_num':'SRA', 
                            'sra_name':'Name', 
                            'latitude':'Latitude', 
                            'longitude':'Longitude', 
                            #'AIAN':'American Indian, AK Native', 
                            'API':'Asian, Pacific Islander'})
print(len(sra))
sra.head()    
    

41


Unnamed: 0,SRA,Name,Latitude,Longitude,Date,AIAN,"Asian, Pacific Islander",Black,Hispanic,White,Multiple Race
0,1,Central San Diego,32.722644,-117.141073,12/26/2020,13.1,300.5,431.8,4010.6,2075.3,80.1
1,2,Peninsula,32.742274,-117.216035,12/26/2020,0.0,35.3,38.3,311.6,766.7,27.9
2,3,Coronado,32.657317,-117.143062,12/26/2020,0.0,0.0,25.0,82.0,241.0,0.0
3,4,National City,32.665847,-117.099737,12/26/2020,5.8,369.5,77.7,2818.9,221.0,15.6
4,5,Southeastern San Diego,32.685705,-117.038621,12/26/2020,18.0,868.5,545.2,5917.3,671.5,81.2


In [22]:
#alphabetize rows by sra name
sra = sra.sort_values(by='Name', axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
sra.head()

Unnamed: 0,SRA,Name,Latitude,Longitude,Date,AIAN,"Asian, Pacific Islander",Black,Hispanic,White,Multiple Race
25,38,Alpine,32.837524,-116.758443,12/26/2020,7.1,7.4,5.6,111.6,292.6,6.5
40,63,Anza-Borrego Springs,33.114514,-116.289908,12/26/2020,0.0,0.0,0.0,33.0,17.7,0.0
28,41,Carlsbad,33.133083,-117.284496,12/26/2020,9.1,133.0,36.5,819.5,1297.6,30.3
0,1,Central San Diego,32.722644,-117.141073,12/26/2020,13.1,300.5,431.8,4010.6,2075.3,80.1
15,21,Chula Vista,32.622386,-117.077827,12/26/2020,7.6,280.6,118.0,6626.6,539.8,47.6


### EXPORT CSV FILE

In [23]:
# save sra df as csv
date2 = sra['Date'][0]
sra.to_csv('./data/covid_ethnicity_data_{}.csv'.format(date2.replace('/','')), index=False)
sra.to_csv('C:/Users/jesse/Dropbox/Mapping-Vulearable-Pop-Tasks/Ethnicity-Maps/covid_ethnicity_data_{}.csv'.format(date2.replace('/','')), index=False)

### APPEND NEW DATE TO ETHNICITY POINTS LAYER

In [24]:
# csv path for gdb table
csv_in = './data/covid_ethnicity_data_{}.csv'.format(date2.replace('/',''))

# Get and set current project and geodatabase info
arcpy.env.overwriteOutput = True
aprx = arcpy.mp.ArcGISProject('CURRENT')
defaultGeoDb = arcpy.env.workspace
aprx.defaultGeodatabase = defaultGeoDb
currentMap = aprx.activeMap

# set names for table and layer
tempStr = 'covid_ethnicity_data_{}'.format(date2.replace('/',''))
newMapName= 'lyr_' + tempStr
newTableName = 'tbl_' + tempStr

# convert CSV data into table and add to default database - look in database to see it was added
arcpy.TableToTable_conversion(csv_in,defaultGeoDb,newTableName)

# convert table to XY point layer
arcpy.management.XYTableToPoint(newTableName, newMapName, 'Longitude', 'Latitude')

In [25]:
# append new date's point data to main feature class
arcpy.management.Append(newMapName, 'lyr_main_ethnicity_pts')