In [34]:
# import dependencies
import pandas as pd
import requests
import json


In [35]:
# read csv on covid-19 covid vulnerability index data and convert to dataframe
ccvi = pd.read_csv('../resources/ccvi.csv')

# drow rows that contain any null values (there are 655 of them)
ccvi = ccvi.dropna(how='any')

# display dataframe
ccvi


Unnamed: 0,FIPS,stateName,countyName,ccvi,theme1,theme2,theme3,theme4,theme5,theme6,theme7
0,1001020100,ALABAMA,Autauga,0.441972,0.458336,0.377478,0.170811,0.841617,0.966294,0.209805,0.287591
1,1001020200,ALABAMA,Autauga,0.742619,0.562935,0.584587,0.683826,0.768947,0.982399,0.209805,0.427810
2,1001020300,ALABAMA,Autauga,0.778072,0.245094,0.645024,0.472226,0.863762,0.953086,0.724817,0.439041
3,1001020400,ALABAMA,Autauga,0.362727,0.023280,0.370119,0.192050,0.977978,0.894654,0.209805,0.425573
4,1001020500,ALABAMA,Autauga,0.816600,0.319725,0.421440,0.764060,0.753240,0.978307,0.749454,0.497597
...,...,...,...,...,...,...,...,...,...,...,...
72832,56043000200,WYOMING,Washakie,0.193098,0.632400,0.106362,0.792014,0.669875,0.126091,0.259923,0.008196
72833,56043000301,WYOMING,Washakie,0.288511,0.536803,0.358751,0.539916,0.400654,0.199599,0.259923,0.589599
72834,56043000302,WYOMING,Washakie,0.574666,0.804595,0.349800,0.613380,0.701398,0.137185,0.815509,0.273972
72835,56045951100,WYOMING,Weston,0.112481,0.476381,0.238316,0.834326,0.559188,0.149020,0.037122,0.008224


In [36]:
# dictionary for convertying state names to corresponding numbers or abbreviations
states = {
    'southcarolina': {'num': '45', 'abbr': 'SC'},
    'southdakota': {'num': '46', 'abbr': 'SD'},
    'tennessee': {'num': '47', 'abbr': 'TN'},
    'texas': {'num': '48', 'abbr': 'TX'},
    'vermont': {'num': '50', 'abbr': 'VT'},
    'utah': {'num': '49', 'abbr': 'UT'},
    'virginia': {'num': '51', 'abbr': 'VA'},
    'washington': {'num': '53', 'abbr': 'WA'},
    'westvirginia': {'num': '54', 'abbr': 'WV'},
    'wisconsin': {'num': '55', 'abbr': 'WI'},
    'wyoming': {'num': '56', 'abbr': 'WY'},
    'puertorico': {'num': '72', 'abbr': 'PR'},
    'alabama': {'num': '01', 'abbr': 'AL'},
    'alaska': {'num': '02', 'abbr': 'AK'},
    'arizona': {'num': '04', 'abbr': 'AZ'},
    'arkansas': {'num': '05', 'abbr': 'AR'},
    'california': {'num': '06', 'abbr': 'CA'},
    'colorado': {'num': '08', 'abbr': 'CO'},
    'delaware': {'num': '10', 'abbr': 'CT'},
    'districtofcolumbia': {'num': '11', 'abbr': 'DE'},
    'connecticut': {'num': '09', 'abbr': 'DC'},
    'florida': {'num': '12', 'abbr': 'FL'},
    'georgia': {'num': '13', 'abbr': 'GA'},
    'idaho': {'num': '16', 'abbr': 'ID'},
    'hawaii': {'num': '15', 'abbr': 'HI'},
    'illinois': {'num': '17', 'abbr': 'IL'},
    'indiana': {'num': '18', 'abbr': 'IN'},
    'iowa': {'num': '19', 'abbr': 'IA'},
    'kansas': {'num': '20', 'abbr': 'KS'},
    'kentucky': {'num': '21', 'abbr': 'KS'},
    'louisiana': {'num': '22', 'abbr': 'LA'},
    'maine': {'num': '23', 'abbr': 'ME'},
    'maryland': {'num': '24', 'abbr': 'MD'},
    'massachusetts': {'num': '25', 'abbr': 'MA'},
    'michigan': {'num': '26', 'abbr': 'MI'},
    'minnesota': {'num': '27', 'abbr': 'MN'},
    'mississippi': {'num': '28', 'abbr': 'MS'},
    'missouri': {'num': '29', 'abbr': 'MO'},
    'montana': {'num': '30', 'abbr': 'MT'},
    'nebraska': {'num': '31', 'abbr': 'NE'},
    'nevada': {'num': '32', 'abbr': 'NV'},
    'newhampshire': {'num': '33', 'abbr': 'NH'},
    'newjersey': {'num': '34', 'abbr': 'NJ'},
    'newmexico': {'num': '35', 'abbr': 'NM'},
    'newyork': {'num': '36', 'abbr': 'NY'},
    'northcarolina': {'num': '37', 'abbr': 'NC'},
    'northdakota': {'num': '38', 'abbr': 'ND'},
    'k6': {'num': '41', 'abbr': 'OR'},
    'pennsylvania': {'num': '42', 'abbr': 'PA'},
    'rhodeisland': {'num': '44', 'abbr': 'RI'}
}


In [38]:
# select state
state = 'California'

state_num = states[state.replace(" ", "").lower()]['num']

# all statistical categories to to be queried 
pops = 'B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B03001_003E'

# create url to request data from api
url = f'https://api.census.gov/data/2019/acs/acs5?get=NAME,{pops}&for=tract:*&in=state:{state_num}'

# set returned data to a variable
response = requests.get(url).json()

# display returned data as a dataframe
pd.DataFrame(response)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,NAME,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B03001_003E,state,county,tract
1,"Census Tract 5079.04, Santa Clara County, Cali...",3195,892,0,63,2097,0,105,06,085,507904
2,"Census Tract 5085.04, Santa Clara County, Cali...",8604,2379,89,0,4940,0,1363,06,085,508504
3,"Census Tract 5085.05, Santa Clara County, Cali...",4871,2201,12,19,2366,0,416,06,085,508505
4,"Census Tract 5087.04, Santa Clara County, Cali...",7587,2999,327,20,3587,3,1491,06,085,508704
...,...,...,...,...,...,...,...,...,...,...,...
8053,"Census Tract 1041.08, Los Angeles County, Cali...",6001,3538,430,15,203,13,4982,06,037,104108
8054,"Census Tract 1042.03, Los Angeles County, Cali...",5441,3158,455,42,68,0,4817,06,037,104203
8055,"Census Tract 1042.04, Los Angeles County, Cali...",3679,2363,139,14,246,0,2870,06,037,104204
8056,"Census Tract 1044.03, Los Angeles County, Cali...",3341,2045,0,25,135,0,3191,06,037,104403


In [7]:
# create list to store dictionaries with data for each census tract
tracts = []

# create dictionaries with population data for each census tract 
# (with properly formatted fips code)
for r in response:
    if r[0] != 'NAME':
        tracts.append({
            'FIPS': int(f'{r[8]}{r[9]}{r[10]}'),
            'total': int(r[1]),
            'white': int(r[2]),
            'black': int(r[3]),
            'native': int(r[4]),
            'asian': int(r[5]),
            'pacific': int(r[6]),
            'hispanic': int(r[7])
        })

# create dataframe with census population data
populations = pd.DataFrame(tracts)

# merge population data and ccvi data on census tract fips code
ccvi_and_pop = pd.merge(populations, ccvi, on='FIPS')

# display dataframe
ccvi_and_pop


Unnamed: 0,FIPS,total,white,black,native,asian,pacific,hispanic,stateName,countyName,ccvi,theme1,theme2,theme3,theme4,theme5,theme6,theme7
0,6085507904,3195,892,0,63,2097,0,105,CALIFORNIA,Santa Clara,0.269849,0.118212,0.804904,0.048265,0.172676,0.315380,0.573650,0.796845
1,6085508504,8604,2379,89,0,4940,0,1363,CALIFORNIA,Santa Clara,0.663623,0.328366,0.861634,0.526029,0.005615,0.730998,0.573650,0.938588
2,6085508505,4871,2201,12,19,2366,0,416,CALIFORNIA,Santa Clara,0.676369,0.322370,0.815188,0.401808,0.100816,0.531811,0.944179,0.890013
3,6085508704,7587,2999,327,20,3587,3,1491,CALIFORNIA,Santa Clara,0.594755,0.510200,0.865849,0.409257,0.003364,0.800772,0.573650,0.592344
4,6085509403,5779,2274,138,41,1815,0,1980,CALIFORNIA,Santa Clara,0.755545,0.546996,0.855003,0.496054,0.002581,0.827723,0.573650,0.962450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7972,6037104108,6001,3538,430,15,203,13,4982,CALIFORNIA,Los Angeles,0.922362,0.715368,0.959622,0.914684,0.190071,0.859081,0.371137,0.914342
7973,6037104203,5441,3158,455,42,68,0,4817,CALIFORNIA,Los Angeles,0.939028,0.919664,0.995730,0.745576,0.147468,0.882091,0.371137,0.948844
7974,6037104204,3679,2363,139,14,246,0,2870,CALIFORNIA,Los Angeles,0.966695,0.796535,0.964921,0.871584,0.180227,0.867675,0.899641,0.614792
7975,6037104403,3341,2045,0,25,135,0,3191,CALIFORNIA,Los Angeles,0.848395,0.864380,0.995002,0.365824,0.243602,0.853452,0.371137,0.909358


In [8]:
# create dictionary to hold data for each racial demographic
demogs = {
    'total': {},
    'white': {},
    'black': {},
    'native': {},
    'asian': {},
    'pacific': {},
    'hispanic': {}
}

# create list of racial groups to iterate through
races = ['total','white','black','native','asian', 'pacific', 'hispanic']

# iterate through list of races
for race in races:
    
    # calculate total population for each race
    demogs[race]['population'] = int(ccvi_and_pop[race].sum())
    
    # calculate average ccvi for each race
    demogs[race]['ccvi'] = (ccvi_and_pop[race]*ccvi_and_pop['ccvi']).sum()/demogs[race]['population']
    
    # calculate population of each race as a percentage of total population
    demogs[race]['population_percent'] = (demogs[race]['population']/demogs['total']['population'])*100

# display dictionary
demogs


{'total': {'population': 39184759,
  'ccvi': 0.6107227243815876,
  'population_percent': 100.0},
 'white': {'population': 23405251,
  'ccvi': 0.5748261136565029,
  'population_percent': 59.73049623707013},
 'black': {'population': 2253401,
  'ccvi': 0.7053331771802169,
  'population_percent': 5.750707819843934},
 'native': {'population': 302002,
  'ccvi': 0.6672118803011934,
  'population_percent': 0.7707129192755785},
 'asian': {'population': 5684633,
  'ccvi': 0.5768182276242306,
  'population_percent': 14.50725523155572},
 'pacific': {'population': 154637,
  'ccvi': 0.6362910543608903,
  'population_percent': 0.39463557757239237},
 'hispanic': {'population': 15288340,
  'ccvi': 0.7351319132562741,
  'population_percent': 39.0160368218674}}

In [9]:
# get covid data for each race by state
covid = pd.read_csv('../resources/CRDT_Data.csv')

# filter to only include data for selected state
covid = covid.loc[covid['State'] == states[state]['abbr'],:]

# create dataframe with only relevant columns for covid cases
cases = covid[['Cases_Total','Cases_White','Cases_Black','Cases_AIAN','Cases_Asian','Cases_NHPI','Cases_Latinx']]

# create dataframe with only relevant columns for covid deaths
deaths = covid[['Deaths_Total','Deaths_White','Deaths_Black','Deaths_AIAN','Deaths_Asian','Deaths_NHPI','Deaths_Latinx']]


In [10]:
# display cases dataframe with only most recent data
cases.head(1)


Unnamed: 0,Cases_Total,Cases_White,Cases_Black,Cases_AIAN,Cases_Asian,Cases_NHPI,Cases_Latinx
5,3501394.0,546630.0,111279.0,9025.0,186562.0,15281.0,1509103.0


In [11]:
# display deaths dataframe with only most recent data
deaths.head(1)

Unnamed: 0,Deaths_Total,Deaths_White,Deaths_Black,Deaths_AIAN,Deaths_Asian,Deaths_NHPI,Deaths_Latinx
5,54124.0,16586.0,3275.0,184.0,6105.0,322.0,24402.0


In [12]:
# iterate through covid data for selected races and place data in a dictionary
for i in range(0, len(cases.columns)):
    
    # total cases for each race
    demogs[races[i]]['cases'] = int(cases[cases.columns[i]].values[0])
    
    # number of cases for each race as a percentage of total cases
    demogs[races[i]]['percent_of_cases'] = (demogs[races[i]]['cases']/demogs['total']['cases'])*100
    
    # percent discrepancy between percent of total cases and percent of total population for by each race
    # (theoretically each race should account for the same percent of cases as their percent of the population)
    demogs[races[i]]['discrepancy_percent'] = (demogs[races[i]]['percent_of_cases']/demogs[races[i]]['population_percent'])*100
    
    # total deaths for each race
    demogs[races[i]]['deaths'] = int(deaths[deaths.columns[i]].values[0])
    
    # chance of an infection resulting in death for each race
    demogs[races[i]]['chance_of_death'] = (demogs[races[i]]['deaths']/demogs[races[i]]['cases'])*100
    
    # number of deaths for each race as a percentage of total deaths
    demogs[races[i]]['percent_of_deaths'] = (demogs[races[i]]['deaths']/demogs['total']['deaths'])*100
    
    # calculated values I considered using but probably wont
#     demogs[races[i]]['discrepancy_difference'] = demogs[races[i]]['percent_of_cases'] - demogs[races[i]]['population_percent']
#     demogs[races[i]]['infected_percent'] = (demogs[races[i]]['cases']/demogs[races[i]]['population'])*100
#     demogs[races[i]]['deaths_discrepancy_percent'] = (demogs[races[i]]['percent_of_deaths']/demogs[races[i]]['population_percent'])*100
#     demogs[races[i]]['deaths_discrepancy_difference'] = demogs[races[i]]['percent_of_deaths'] - demogs[races[i]]['population_percent']
#     demogs[races[i]]['died_percent'] = (demogs[races[i]]['deaths']/demogs[races[i]]['population'])*100


# create dataframe without total population values
demographics = pd.DataFrame(demogs).drop(columns=['total'])

# display dataframe
demographics


Unnamed: 0,white,black,native,asian,pacific,hispanic
population,23405250.0,2253401.0,302002.0,5684633.0,154637.0,15288340.0
ccvi,0.5748261,0.7053332,0.667212,0.5768182,0.636291,0.7351319
population_percent,59.7305,5.750708,0.770713,14.50726,0.394636,39.01604
cases,546630.0,111279.0,9025.0,186562.0,15281.0,1509103.0
percent_of_cases,15.61178,3.178134,0.257754,5.328221,0.436426,43.10006
discrepancy_percent,26.13704,55.2651,33.443644,36.72797,110.589669,110.4676
deaths,16586.0,3275.0,184.0,6105.0,322.0,24402.0
chance_of_death,3.034228,2.943053,2.038781,3.272371,2.107192,1.616987
percent_of_deaths,30.64445,6.05092,0.33996,11.27965,0.59493,45.08536


In [13]:
# create dictionary to hold calculated values to be used in max patch
for_max = {}

# iterate through statistical categories
for row in list(demographics.index):
    
    # create a list that holds all values within the row of a statistical category
    values = demographics.loc[row].values
        
    # iterate through races
    for i in range(1, len(races)):
        
        # get population numbers
        if row == 'population':
            for_max[races[i]] = {}
            for_max[races[i]][row] = int(values[i-1])
                
        # calculate inverted ccvi values
        elif row == 'ccvi':
            for_max[races[i]][row] = 100-(values[i-1])*100
        
        # calculate chances for where next infection will occure
        elif row == 'discrepancy_percent':
            for_max[races[i]]['chance_of_infection'] = (values[i-1]/values.sum())*100
        
        # get values for chance of infection resulting in death
        elif row == 'chance_of_death':
            for_max[races[i]][row] = values[i-1]
            
# display resulting dictionary            
for_max


{'white': {'population': 23405251,
  'ccvi': 42.51738863434971,
  'chance_of_infection': 7.014188043969955,
  'chance_of_death': 3.034227905530249},
 'black': {'population': 2253401,
  'ccvi': 29.466682281978308,
  'chance_of_infection': 14.831052639534542,
  'chance_of_death': 2.9430530468462153},
 'native': {'population': 302002,
  'ccvi': 33.278811969880664,
  'chance_of_infection': 8.975003700297991,
  'chance_of_death': 2.038781163434903},
 'asian': {'population': 5684633,
  'ccvi': 42.31817723757694,
  'chance_of_infection': 9.856392777186318,
  'chance_of_death': 3.272370579217633},
 'pacific': {'population': 154637,
  'ccvi': 36.37089456391097,
  'chance_of_infection': 29.67806658379855,
  'chance_of_death': 2.1071919377004122},
 'hispanic': {'population': 15288340,
  'ccvi': 26.486808674372583,
  'chance_of_infection': 29.645296255212646,
  'chance_of_death': 1.6169870446218715}}