# Clean & Process COVID-19 Ethnicity Files for Pie Chart Symbology Maps
Center for Human Dynamics in the Mobile Age (HDMA) at San Diego State University

Jessica Embury

### MODULES

In [None]:
import arcpy
import pandas as pd
import os

### USER SET VARIABLE FOR FILE DATE
Use county's naming convention for file date

In [None]:
date = '28NOV2020'

### CREATE LIST OBJECT WITH ALL FILE NAMES

In [None]:
#file directory
path = '../covid_data/ethnicity/{}/'.format(date)

#file names
base = 'COVID19_DailyTotalRate_'

white_file = base + 'White_' + date + '.csv'
multi_file = base + 'MultipleRace_' + date + '.csv'
hispanic_file = base + 'Hispanic_' + date + '.csv'
black_file = base + 'Black_' + date + '.csv'
api_file = base + 'API_' + date + '.csv'
aian_file = base + 'AIAN_' + date + '.csv'

files = [aian_file, api_file, black_file, hispanic_file, white_file, multi_file]

### DATA FRAME WITH SUPPLEMENTAL SRA DATA, COORDINATES

In [None]:
#path
sra_in = './data/sra_info.csv'

#base sra data
sra = pd.read_csv(sra_in)
print(len(sra))
sra.head()

### ADD A COLUMN FOR EACH ETHNICITY TO THE SRA DATAFRAME

In [None]:
#add ethnicity data as columns to the sra df
for f in range(len(files)):
    
    #not every week has had a file for each ethnicity, so check if file for ethnicity exists
    if os.path.exists(path + files[f]):
        
        #ethnicity column name
        ethnicity = files[f].split('_')[2]
        if ethnicity == 'MultipleRace':
            ethnicity = 'Multiple\nRace'
    
        #temp df for specific ethnicity
        df = pd.read_csv(path + files[f])

        #add date to sra df
        if f == 0:
            sra['Date'] = df['Date'][0]
    
        #add ethnicity columns to sra df
        #multiple column name options to catch naming variations in files from the county
        df = df.rename(columns = {'Allocated {}\nCase Count (Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated {} Case Count (Raw)'.format(ethnicity.replace('\n',' ')):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated\n{} Case Count (Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')),
                                  'Allocated\n{} Case\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')), 
                                  'Allocated\n{}\nCase\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')), 
                                  'Allocated\n{}\nRace\nCase\nCount\n(Raw)'.format(ethnicity):'{}'.format(ethnicity.replace('\n',' ')), 
                                  'Geography':'sra_name'})
        subset_df = df[['sra_name', ethnicity.replace('\n',' ')]].fillna(0)
    
        sra = sra.merge(subset_df, how='left', on='sra_name')

sra = sra.rename(columns = {'sra_num':'SRA', 
                            'sra_name':'Name', 
                            'latitude':'Latitude', 
                            'longitude':'Longitude', 
                            'AIAN':'American Indian, AK Native', 
                            'API':'Asian, Pacific Islander'})
print(len(sra))
sra.head()    
    

In [None]:
#alphabetize rows by sra name
sra = sra.sort_values(by='Name', axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last')
sra.head()

### EXPORT CSV FILE

In [None]:
# save sra df as csv
date2 = sra['Date'][0]
sra.to_csv('./data/covid_ethnicity_data_{}.csv'.format(date2.replace('/','')), index=False)
sra.to_csv('C:/Users/jesse/Dropbox/Mapping-Vulearable-Pop-Tasks/Ethnicity-Maps/covid_ethnicity_data_{}.csv'.format(date2.replace('/','')), index=False)

### APPEND NEW DATE TO ETHNICITY POINTS LAYER

In [None]:
# csv path for gdb table
csv_in = './data/covid_ethnicity_data_{}.csv'.format(date2.replace('/',''))

# Get and set current project and geodatabase info
arcpy.env.overwriteOutput = True
aprx = arcpy.mp.ArcGISProject('CURRENT')
defaultGeoDb = arcpy.env.workspace
aprx.defaultGeodatabase = defaultGeoDb
currentMap = aprx.activeMap

# set names for table and layer
tempStr = 'covid_ethnicity_data_{}'.format(date2.replace('/',''))
newMapName= 'lyr_' + tempStr
newTableName = 'tbl_' + tempStr

# convert CSV data into table and add to default database - look in database to see it was added
arcpy.TableToTable_conversion(csv_in,defaultGeoDb,newTableName)

# convert table to XY point layer
arcpy.management.XYTableToPoint(newTableName, newMapName, 'Longitude', 'Latitude')

# append new date's point data to main feature class
arcpy.management.Append(newMapName, 'lyr_main_ethnicity_pts')