In [1]:
# import dependencies
import pandas as pd
import requests
import json


In [2]:
# read csv on covid-19 covid vulnerability index data and convert to dataframe
ccvi = pd.read_csv('../resources/ccvi.csv')

# drow rows that contain any null values (there are 655 of them)
ccvi = ccvi.dropna(how='any')

# display dataframe
ccvi


Unnamed: 0,FIPS,stateName,countyName,ccvi,theme1,theme2,theme3,theme4,theme5,theme6,theme7
0,1001020100,ALABAMA,Autauga,0.441972,0.458336,0.377478,0.170811,0.841617,0.966294,0.209805,0.287591
1,1001020200,ALABAMA,Autauga,0.742619,0.562935,0.584587,0.683826,0.768947,0.982399,0.209805,0.427810
2,1001020300,ALABAMA,Autauga,0.778072,0.245094,0.645024,0.472226,0.863762,0.953086,0.724817,0.439041
3,1001020400,ALABAMA,Autauga,0.362727,0.023280,0.370119,0.192050,0.977978,0.894654,0.209805,0.425573
4,1001020500,ALABAMA,Autauga,0.816600,0.319725,0.421440,0.764060,0.753240,0.978307,0.749454,0.497597
...,...,...,...,...,...,...,...,...,...,...,...
72832,56043000200,WYOMING,Washakie,0.193098,0.632400,0.106362,0.792014,0.669875,0.126091,0.259923,0.008196
72833,56043000301,WYOMING,Washakie,0.288511,0.536803,0.358751,0.539916,0.400654,0.199599,0.259923,0.589599
72834,56043000302,WYOMING,Washakie,0.574666,0.804595,0.349800,0.613380,0.701398,0.137185,0.815509,0.273972
72835,56045951100,WYOMING,Weston,0.112481,0.476381,0.238316,0.834326,0.559188,0.149020,0.037122,0.008224


In [3]:
# dictionary for convertying state names to corresponding numbers or abbreviations
states = {
    'southcarolina': {'num': '45', 'abbr': 'SC'},
    'southdakota': {'num': '46', 'abbr': 'SD'},
    'tennessee': {'num': '47', 'abbr': 'TN'},
    'texas': {'num': '48', 'abbr': 'TX'},
    'vermont': {'num': '50', 'abbr': 'VT'},
    'utah': {'num': '49', 'abbr': 'UT'},
    'virginia': {'num': '51', 'abbr': 'VA'},
    'washington': {'num': '53', 'abbr': 'WA'},
    'westvirginia': {'num': '54', 'abbr': 'WV'},
    'wisconsin': {'num': '55', 'abbr': 'WI'},
    'wyoming': {'num': '56', 'abbr': 'WY'},
    'puertorico': {'num': '72', 'abbr': 'PR'},
    'alabama': {'num': '01', 'abbr': 'AL'},
    'alaska': {'num': '02', 'abbr': 'AK'},
    'arizona': {'num': '04', 'abbr': 'AZ'},
    'arkansas': {'num': '05', 'abbr': 'AR'},
    'california': {'num': '06', 'abbr': 'CA'},
    'colorado': {'num': '08', 'abbr': 'CO'},
    'delaware': {'num': '10', 'abbr': 'CT'},
    'districtofcolumbia': {'num': '11', 'abbr': 'DE'},
    'connecticut': {'num': '09', 'abbr': 'DC'},
    'florida': {'num': '12', 'abbr': 'FL'},
    'georgia': {'num': '13', 'abbr': 'GA'},
    'idaho': {'num': '16', 'abbr': 'ID'},
    'hawaii': {'num': '15', 'abbr': 'HI'},
    'illinois': {'num': '17', 'abbr': 'IL'},
    'indiana': {'num': '18', 'abbr': 'IN'},
    'iowa': {'num': '19', 'abbr': 'IA'},
    'kansas': {'num': '20', 'abbr': 'KS'},
    'kentucky': {'num': '21', 'abbr': 'KS'},
    'louisiana': {'num': '22', 'abbr': 'LA'},
    'maine': {'num': '23', 'abbr': 'ME'},
    'maryland': {'num': '24', 'abbr': 'MD'},
    'massachusetts': {'num': '25', 'abbr': 'MA'},
    'michigan': {'num': '26', 'abbr': 'MI'},
    'minnesota': {'num': '27', 'abbr': 'MN'},
    'mississippi': {'num': '28', 'abbr': 'MS'},
    'missouri': {'num': '29', 'abbr': 'MO'},
    'montana': {'num': '30', 'abbr': 'MT'},
    'nebraska': {'num': '31', 'abbr': 'NE'},
    'nevada': {'num': '32', 'abbr': 'NV'},
    'newhampshire': {'num': '33', 'abbr': 'NH'},
    'newjersey': {'num': '34', 'abbr': 'NJ'},
    'newmexico': {'num': '35', 'abbr': 'NM'},
    'newyork': {'num': '36', 'abbr': 'NY'},
    'northcarolina': {'num': '37', 'abbr': 'NC'},
    'northdakota': {'num': '38', 'abbr': 'ND'},
    'k6': {'num': '41', 'abbr': 'OR'},
    'pennsylvania': {'num': '42', 'abbr': 'PA'},
    'rhodeisland': {'num': '44', 'abbr': 'RI'}
}


In [4]:
# select state
state = 'Missouri'

# convert state to proper format
state = state.replace(" ", "").lower()

state_num = states[state]['num']

# all statistical categories to to be queried 
pops = 'B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B03001_003E'

# create url to request data from api
url = f'https://api.census.gov/data/2019/acs/acs5?get=NAME,{pops}&for=tract:*&in=state:{state_num}'

# set returned data to a variable
response = requests.get(url).json()

# display returned data as a dataframe
pd.DataFrame(response)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,NAME,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B03001_003E,state,county,tract
1,"Census Tract 73, Jackson County, Missouri",3091,2582,214,0,153,0,80,29,095,007300
2,"Census Tract 76, Jackson County, Missouri",2018,345,1666,0,0,0,144,29,095,007600
3,"Census Tract 79, Jackson County, Missouri",3612,294,3256,20,10,0,47,29,095,007900
4,"Census Tract 102.01, Jackson County, Missouri",2362,1094,1167,21,21,39,94,29,095,010201
...,...,...,...,...,...,...,...,...,...,...,...
1389,"Census Tract 2214.22, St. Louis County, Missouri",9122,8802,1,0,254,0,216,29,189,221422
1390,"Census Tract 2192, St. Louis County, Missouri",2417,2195,103,0,48,0,157,29,189,219200
1391,"Census Tract 2207.03, St. Louis County, Missouri",2836,2794,7,5,13,0,158,29,189,220703
1392,"Census Tract 2134, St. Louis County, Missouri",5882,3557,1949,3,57,0,453,29,189,213400


In [5]:
# create list to store dictionaries with data for each census tract
tracts = []

# create dictionaries with population data for each census tract 
# (with properly formatted fips code)
for r in response:
    if r[0] != 'NAME':
        tracts.append({
            'FIPS': int(f'{r[8]}{r[9]}{r[10]}'),
            'total': int(r[1]),
            'white': int(r[2]),
            'black': int(r[3]),
            'native': int(r[4]),
            'asian': int(r[5]),
            'pacific': int(r[6]),
            'hispanic': int(r[7])
        })

# create dataframe with census population data
populations = pd.DataFrame(tracts)

# merge population data and ccvi data on census tract fips code
ccvi_and_pop = pd.merge(populations, ccvi, on='FIPS')

# display dataframe
ccvi_and_pop


Unnamed: 0,FIPS,total,white,black,native,asian,pacific,hispanic,stateName,countyName,ccvi,theme1,theme2,theme3,theme4,theme5,theme6,theme7
0,29095007300,3091,2582,214,0,153,0,80,MISSOURI,Jackson,0.356867,0.622720,0.141263,0.354166,0.069402,0.536136,0.457533,0.894722
1,29095007600,2018,345,1666,0,0,0,144,MISSOURI,Jackson,0.868511,0.960531,0.603630,0.795323,0.753968,0.443531,0.457533,0.668351
2,29095007900,3612,294,3256,20,10,0,47,MISSOURI,Jackson,0.934997,0.818942,0.475424,0.933902,0.709663,0.483058,0.901906,0.667458
3,29095010201,2362,1094,1167,21,21,39,94,MISSOURI,Jackson,0.734806,0.761595,0.820872,0.945186,0.207397,0.595626,0.457533,0.403441
4,29095008300,2528,2317,67,0,16,0,117,MISSOURI,Jackson,0.142503,0.027725,0.235763,0.006217,0.626902,0.330839,0.457533,0.735612
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1383,29189221422,9122,8802,1,0,254,0,216,MISSOURI,St. Louis,0.120697,0.190087,0.164259,0.325977,0.538964,0.188341,0.361497,0.565874
1384,29189219200,2417,2195,103,0,48,0,157,MISSOURI,St. Louis,0.366010,0.122589,0.221113,0.536386,0.547847,0.188670,0.893578,0.592784
1385,29189220703,2836,2794,7,5,13,0,158,MISSOURI,St. Louis,0.148696,0.136880,0.254901,0.201742,0.803353,0.125405,0.361497,0.559105
1386,29189213400,5882,3557,1949,3,57,0,453,MISSOURI,St. Louis,0.563902,0.741819,0.312414,0.787763,0.491639,0.330111,0.361497,0.639217


In [6]:
# create dictionary to hold data for each racial demographic
demogs = {
    'total': {},
    'white': {},
    'black': {},
    'native': {},
    'asian': {},
    'pacific': {},
    'hispanic': {}
}

# create list of racial groups to iterate through
races = ['total','white','black','native','asian', 'pacific', 'hispanic']

# iterate through list of races
for race in races:
    
    # calculate total population for each race
    demogs[race]['population'] = int(ccvi_and_pop[race].sum())
    
    # calculate average ccvi for each race
    demogs[race]['ccvi'] = (ccvi_and_pop[race]*ccvi_and_pop['ccvi']).sum()/demogs[race]['population']
    
    # calculate population of each race as a percentage of total population
    demogs[race]['population_percent'] = (demogs[race]['population']/demogs['total']['population'])*100

# display dictionary
demogs


{'total': {'population': 6104896,
  'ccvi': 0.46431858216201444,
  'population_percent': 100.0},
 'white': {'population': 5015890,
  'ccvi': 0.43772828282677106,
  'population_percent': 82.16176000377402},
 'black': {'population': 701334,
  'ccvi': 0.6452804263520205,
  'population_percent': 11.488058109425616},
 'native': {'population': 27084,
  'ccvi': 0.523344269273704,
  'population_percent': 0.44364392120684776},
 'asian': {'population': 120654,
  'ccvi': 0.40249949310113214,
  'population_percent': 1.9763481638344045},
 'pacific': {'population': 8231,
  'ccvi': 0.5505822407440165,
  'population_percent': 0.13482621161769176},
 'hispanic': {'population': 254791,
  'ccvi': 0.5533935473806177,
  'population_percent': 4.17355185084234}}

In [7]:
# get covid data for each race by state
covid = pd.read_csv('../resources/CRDT_Data.csv')

# filter to only include data for selected state
covid = covid.loc[covid['State'] == states[state]['abbr'],:]

# filter to only include data from 2020
covid = covid.loc[covid['Date'] < 20210000,:]

covid

# # create dataframe with only relevant columns for covid cases
# cases = covid[['Cases_Total','Cases_White','Cases_Black','Cases_AIAN','Cases_Asian','Cases_NHPI','Cases_Latinx']]

# # create dataframe with only relevant columns for covid deaths
# deaths = covid[['Deaths_Total','Deaths_White','Deaths_Black','Deaths_AIAN','Deaths_Asian','Deaths_NHPI','Deaths_Latinx']]


Unnamed: 0,Date,State,Cases_Total,Cases_White,Cases_Black,Cases_Latinx,Cases_Asian,Cases_AIAN,Cases_NHPI,Cases_Multiracial,...,Tests_Latinx,Tests_Asian,Tests_AIAN,Tests_NHPI,Tests_Multiracial,Tests_Other,Tests_Unknown,Tests_Ethnicity_Hispanic,Tests_Ethnicity_NonHispanic,Tests_Ethnicity_Unknown
1090,20201230,MO,388856.0,187040.0,34647.0,,3266.0,700.0,30.0,78.0,...,,,,,,,,,,
1146,20201227,MO,382094.0,182335.0,34045.0,,3171.0,688.0,30.0,76.0,...,,,,,,,,,,
1202,20201223,MO,373580.0,177525.0,33473.0,,3101.0,672.0,30.0,75.0,...,,,,,,,,,,
1258,20201220,MO,365186.0,172769.0,32830.0,,2995.0,657.0,30.0,73.0,...,,,,,,,,,,
1314,20201216,MO,353038.0,164551.0,31844.0,,2895.0,635.0,30.0,71.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5066,20200426,MO,6994.0,2798.0,2099.0,,,,,209.0,...,,,,,,,,,,
5122,20200422,MO,6135.0,2393.0,1841.0,,,,,184.0,...,,,,,,,,,,
5178,20200419,MO,5665.0,2153.0,1643.0,,,,,170.0,...,,,,,,,,,,
5234,20200415,MO,4895.0,1762.0,1419.0,,,,,146.0,...,,,,,,,,,,


In [8]:
# display cases dataframe with only most recent data
cases.head(1)


Unnamed: 0,Cases_Total,Cases_White,Cases_Black,Cases_AIAN,Cases_Asian,Cases_NHPI,Cases_Latinx
26,480643.0,252254.0,42276.0,942.0,4317.0,22.0,


In [9]:
# display deaths dataframe with only most recent data
deaths.head(1)

Unnamed: 0,Deaths_Total,Deaths_White,Deaths_Black,Deaths_AIAN,Deaths_Asian,Deaths_NHPI,Deaths_Latinx
26,8161.0,5162.0,1069.0,12.0,67.0,1.0,


In [10]:
# iterate through covid data for selected races and place data in a dictionary
for i in range(0, len(cases.columns)):
    
    # total cases for each race
    demogs[races[i]]['cases'] = int(cases[cases.columns[i]].values[0])
    
    # number of cases for each race as a percentage of total cases
    demogs[races[i]]['percent_of_cases'] = (demogs[races[i]]['cases']/demogs['total']['cases'])*100
    
    # percent discrepancy between percent of total cases and percent of total population for by each race
    # (theoretically each race should account for the same percent of cases as their percent of the population)
    demogs[races[i]]['discrepancy_percent'] = (demogs[races[i]]['percent_of_cases']/demogs[races[i]]['population_percent'])*100
    
    # total deaths for each race
    demogs[races[i]]['deaths'] = int(deaths[deaths.columns[i]].values[0])
    
    # chance of an infection resulting in death for each race
    demogs[races[i]]['chance_of_death'] = (demogs[races[i]]['deaths']/demogs[races[i]]['cases'])*100
    
    # number of deaths for each race as a percentage of total deaths
    demogs[races[i]]['percent_of_deaths'] = (demogs[races[i]]['deaths']/demogs['total']['deaths'])*100
    
    # calculated values I considered using but probably wont
#     demogs[races[i]]['discrepancy_difference'] = demogs[races[i]]['percent_of_cases'] - demogs[races[i]]['population_percent']
#     demogs[races[i]]['infected_percent'] = (demogs[races[i]]['cases']/demogs[races[i]]['population'])*100
#     demogs[races[i]]['deaths_discrepancy_percent'] = (demogs[races[i]]['percent_of_deaths']/demogs[races[i]]['population_percent'])*100
#     demogs[races[i]]['deaths_discrepancy_difference'] = demogs[races[i]]['percent_of_deaths'] - demogs[races[i]]['population_percent']
#     demogs[races[i]]['died_percent'] = (demogs[races[i]]['deaths']/demogs[races[i]]['population'])*100


# create dataframe without total population values
demographics = pd.DataFrame(demogs).drop(columns=['total'])

# display dataframe
demographics


ValueError: cannot convert float NaN to integer

In [None]:
# create dictionary to hold calculated values to be used in max patch
for_max = {}

# iterate through statistical categories
for row in list(demographics.index):
    
    # create a list that holds all values within the row of a statistical category
    values = demographics.loc[row].values
        
    # iterate through races
    for i in range(1, len(races)):
        
        # get population numbers
        if row == 'population':
            for_max[races[i]] = {}
            for_max[races[i]][row] = int(values[i-1])
                
        # calculate inverted ccvi values
        elif row == 'ccvi':
            for_max[races[i]][row] = 100-(values[i-1])*100
        
        # calculate chances for where next infection will occure
        elif row == 'discrepancy_percent':
            for_max[races[i]]['chance_of_infection'] = (values[i-1]/values.sum())*100
        
        # get values for chance of infection resulting in death
        elif row == 'chance_of_death':
            for_max[races[i]][row] = values[i-1]
            
# display resulting dictionary            
for_max
