In [71]:
# import dependencies
import pandas as pd
import requests
import json


In [72]:
# read csv on covid-19 covid vulnerability index data and convert to dataframe
ccvi = pd.read_csv('../resources/ccvi.csv')

# drop rows that contain any null values (there are 655 of them)
ccvi = ccvi.dropna(how='any')

# display dataframe
ccvi


Unnamed: 0,FIPS,stateName,countyName,ccvi,theme1,theme2,theme3,theme4,theme5,theme6,theme7
0,1001020100,ALABAMA,Autauga,0.441972,0.458336,0.377478,0.170811,0.841617,0.966294,0.209805,0.287591
1,1001020200,ALABAMA,Autauga,0.742619,0.562935,0.584587,0.683826,0.768947,0.982399,0.209805,0.427810
2,1001020300,ALABAMA,Autauga,0.778072,0.245094,0.645024,0.472226,0.863762,0.953086,0.724817,0.439041
3,1001020400,ALABAMA,Autauga,0.362727,0.023280,0.370119,0.192050,0.977978,0.894654,0.209805,0.425573
4,1001020500,ALABAMA,Autauga,0.816600,0.319725,0.421440,0.764060,0.753240,0.978307,0.749454,0.497597
...,...,...,...,...,...,...,...,...,...,...,...
72832,56043000200,WYOMING,Washakie,0.193098,0.632400,0.106362,0.792014,0.669875,0.126091,0.259923,0.008196
72833,56043000301,WYOMING,Washakie,0.288511,0.536803,0.358751,0.539916,0.400654,0.199599,0.259923,0.589599
72834,56043000302,WYOMING,Washakie,0.574666,0.804595,0.349800,0.613380,0.701398,0.137185,0.815509,0.273972
72835,56045951100,WYOMING,Weston,0.112481,0.476381,0.238316,0.834326,0.559188,0.149020,0.037122,0.008224


In [73]:
# get covid data for each race by state
covid = pd.read_csv('../resources/CRDT_Data.csv')

# display dataframe
covid


Unnamed: 0,Date,State,Cases_Total,Cases_White,Cases_Black,Cases_Latinx,Cases_Asian,Cases_AIAN,Cases_NHPI,Cases_Multiracial,...,Tests_Latinx,Tests_Asian,Tests_AIAN,Tests_NHPI,Tests_Multiracial,Tests_Other,Tests_Unknown,Tests_Ethnicity_Hispanic,Tests_Ethnicity_NonHispanic,Tests_Ethnicity_Unknown
0,20210307,AK,59332.0,18300.0,1499.0,,2447.0,12238.0,1508.0,4453.0,...,,,,,,,,,,
1,20210307,AL,499819.0,160347.0,82790.0,,2273.0,,,,...,,,,,,,,,,
2,20210307,AR,324818.0,207596.0,50842.0,,2913.0,1070.0,3358.0,1804.0,...,,,,,,,,,,
3,20210307,AS,,,,,,,,,...,,,,,,,,,,
4,20210307,AZ,826454.0,308453.0,25775.0,244539.0,11921.0,40707.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5315,20200412,VT,,,,,,,,,...,,,,,,,,,,
5316,20200412,WA,10411.0,2903.0,289.0,1180.0,451.0,41.0,61.0,112.0,...,,,,,,,,,,
5317,20200412,WI,3341.0,1680.0,857.0,,81.0,28.0,,,...,,,,,,,,,,
5318,20200412,WV,,,,,,,,,...,,,,,,,,,,


In [74]:
# dictionary for convertying state names to corresponding numbers or abbreviations
states = {
    'southcarolina': {'num': '45', 'abbr': 'SC'},
    'southdakota': {'num': '46', 'abbr': 'SD'},
    'tennessee': {'num': '47', 'abbr': 'TN'},
    'texas': {'num': '48', 'abbr': 'TX'},
    'vermont': {'num': '50', 'abbr': 'VT'},
    'utah': {'num': '49', 'abbr': 'UT'},
    'virginia': {'num': '51', 'abbr': 'VA'},
    'washington': {'num': '53', 'abbr': 'WA'},
    'westvirginia': {'num': '54', 'abbr': 'WV'},
    'wisconsin': {'num': '55', 'abbr': 'WI'},
    'wyoming': {'num': '56', 'abbr': 'WY'},
    'puertorico': {'num': '72', 'abbr': 'PR'},
    'alabama': {'num': '01', 'abbr': 'AL'},
    'alaska': {'num': '02', 'abbr': 'AK'},
    'arizona': {'num': '04', 'abbr': 'AZ'},
    'arkansas': {'num': '05', 'abbr': 'AR'},
    'california': {'num': '06', 'abbr': 'CA'},
    'colorado': {'num': '08', 'abbr': 'CO'},
    'delaware': {'num': '10', 'abbr': 'CT'},
    'districtofcolumbia': {'num': '11', 'abbr': 'DE'},
    'connecticut': {'num': '09', 'abbr': 'DC'},
    'florida': {'num': '12', 'abbr': 'FL'},
    'georgia': {'num': '13', 'abbr': 'GA'},
    'idaho': {'num': '16', 'abbr': 'ID'},
    'hawaii': {'num': '15', 'abbr': 'HI'},
    'illinois': {'num': '17', 'abbr': 'IL'},
    'indiana': {'num': '18', 'abbr': 'IN'},
    'iowa': {'num': '19', 'abbr': 'IA'},
    'kansas': {'num': '20', 'abbr': 'KS'},
    'kentucky': {'num': '21', 'abbr': 'KS'},
    'louisiana': {'num': '22', 'abbr': 'LA'},
    'maine': {'num': '23', 'abbr': 'ME'},
    'maryland': {'num': '24', 'abbr': 'MD'},
    'massachusetts': {'num': '25', 'abbr': 'MA'},
    'michigan': {'num': '26', 'abbr': 'MI'},
    'minnesota': {'num': '27', 'abbr': 'MN'},
    'mississippi': {'num': '28', 'abbr': 'MS'},
    'missouri': {'num': '29', 'abbr': 'MO'},
    'montana': {'num': '30', 'abbr': 'MT'},
    'nebraska': {'num': '31', 'abbr': 'NE'},
    'nevada': {'num': '32', 'abbr': 'NV'},
    'newhampshire': {'num': '33', 'abbr': 'NH'},
    'newjersey': {'num': '34', 'abbr': 'NJ'},
    'newmexico': {'num': '35', 'abbr': 'NM'},
    'newyork': {'num': '36', 'abbr': 'NY'},
    'northcarolina': {'num': '37', 'abbr': 'NC'},
    'northdakota': {'num': '38', 'abbr': 'ND'},
    'oregon': {'num': '41', 'abbr': 'OR'},
    'pennsylvania': {'num': '42', 'abbr': 'PA'},
    'rhodeisland': {'num': '44', 'abbr': 'RI'}
}

# all statistical categories to to be queried 
pops = 'B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B03001_003E'

# create list of racial groups to iterate through
races = ['total','white','black','native','asian','pacific','hispanic']

# dictionary with all data to be used from all states that made the data avaliable
stateData = {}

# all states without necessary data
error_states = []


In [75]:
# iterate through states
for state in states:
    
    try:

        # get census state number
        state_num = states[state]['num']

        # create url to request data from api
        url = f'https://api.census.gov/data/2019/acs/acs5?get=NAME,{pops}&for=tract:*&in=state:{state_num}'

        # set returned data to a variable
        response = requests.get(url).json()

        # create list to store dictionaries with data for each census tract
        tracts = []

        # create dictionaries with population data for each census tract 
        # (with properly formatted fips code)
        for r in response:
            if r[0] != 'NAME':
                tracts.append({
                    'FIPS': int(f'{r[8]}{r[9]}{r[10]}'),
                    'total': int(r[1]),
                    'white': int(r[2]),
                    'black': int(r[3]),
                    'native': int(r[4]),
                    'asian': int(r[5]),
                    'pacific': int(r[6]),
                    'hispanic': int(r[7])
                })

        # create dataframe with census population data
        populations = pd.DataFrame(tracts)

        # merge population data and ccvi data on census tract fips code
        ccvi_and_pop = pd.merge(populations, ccvi, on='FIPS')

        # create dictionary to hold data for each racial demographic
        demogs = {
            'total': {},
            'white': {},
            'black': {},
            'native': {},
            'asian': {},
            'pacific': {},
            'hispanic': {}
        }

        # iterate through list of races
        for race in races:

            # calculate total population for each race
            demogs[race]['population'] = int(ccvi_and_pop[race].sum())

            # calculate average ccvi for each race
            demogs[race]['ccvi'] = (ccvi_and_pop[race]*ccvi_and_pop['ccvi']).sum()/demogs[race]['population']

            # calculate population of each race as a percentage of total population
            demogs[race]['population_percent'] = (demogs[race]['population']/demogs['total']['population'])*100

        # get covid data for each race by state
        covid = pd.read_csv('../resources/CRDT_Data.csv')

        # filter to only include data for selected state
        covid = covid.loc[covid['State'] == states[state]['abbr'],:]

        # filter to only include data from 2020
        covid = covid.loc[covid['Date'] < 20210000,:]

        # create dataframe with only relevant columns for covid cases
        cases = covid[['Cases_Total','Cases_White','Cases_Black','Cases_AIAN','Cases_Asian','Cases_NHPI','Cases_Ethnicity_Hispanic']]

        # create dataframe with only relevant columns for covid deaths
        deaths = covid[['Deaths_Total','Deaths_White','Deaths_Black','Deaths_AIAN','Deaths_Asian','Deaths_NHPI','Deaths_Ethnicity_Hispanic']]

        # iterate through covid data for selected races and place data in a dictionary
        for i in range(0, len(cases.columns)):

            # total cases for each race
            demogs[races[i]]['cases'] = int(cases[cases.columns[i]].values[0])

            # number of cases for each race as a percentage of total cases
            demogs[races[i]]['percent_of_cases'] = (demogs[races[i]]['cases']/demogs['total']['cases'])*100

            # percent discrepancy between percent of total cases and percent of total population for by each race
            # (theoretically each race should account for the same percent of cases as their percent of the population)
            demogs[races[i]]['discrepancy_percent'] = (demogs[races[i]]['percent_of_cases']/demogs[races[i]]['population_percent'])*100

            # total deaths for each race
            demogs[races[i]]['deaths'] = int(deaths[deaths.columns[i]].values[0])

            # chance of an infection resulting in death for each race
            demogs[races[i]]['chance_of_death'] = (demogs[races[i]]['deaths']/demogs[races[i]]['cases'])*100

            # number of deaths for each race as a percentage of total deaths
            demogs[races[i]]['percent_of_deaths'] = (demogs[races[i]]['deaths']/demogs['total']['deaths'])*100

        # create dataframe without total population values
        demographics = pd.DataFrame(demogs).drop(columns=['total'])


        # create dictionary to hold calculated values to be used in max patch
        for_max = {}

        # iterate through statistical categories
        for row in list(demographics.index):

            # create a list that holds all values within the row of a statistical category
            values = demographics.loc[row].values

            # iterate through races
            for i in range(1, len(races)):

                # get population numbers
                if row == 'population':
                    for_max[races[i]] = {}
                    for_max[races[i]][row] = int(values[i-1])

                # calculate inverted ccvi values
                elif row == 'ccvi':
                    for_max[races[i]]['inverted_ccvi'] = round(100-(values[i-1])*100, 2)

                # calculate chances for where next infection will occure
                elif row == 'discrepancy_percent':
                    for_max[races[i]]['chance_of_infection'] = round((values[i-1]/values.sum())*100, 2)

                # get values for chance of infection resulting in death
                elif row == 'chance_of_death':
                    for_max[races[i]][row] = round(values[i-1], 2)

        # create keys to hold number of cases and deaths generated by Max alogrithm
        for key in for_max:
            for_max[key]['generated_cases'] = 0
            for_max[key]['generated_deaths'] = 0

        stateData[state] = for_max
    
    except:
        
        error_states.append(state)



  demogs[race]['ccvi'] = (ccvi_and_pop[race]*ccvi_and_pop['ccvi']).sum()/demogs[race]['population']


In [76]:
# display avaliable states
for state in stateData:
    print(state)

tennessee
utah
washington
wyoming
alaska
arkansas
california
colorado
georgia
illinois
iowa
maine
minnesota
missouri
nebraska
oregon


In [77]:
# display unavaliable states
for state in error_states:
    print(state)

southcarolina
southdakota
texas
vermont
virginia
westvirginia
wisconsin
puertorico
alabama
arizona
delaware
districtofcolumbia
connecticut
florida
idaho
hawaii
indiana
kansas
kentucky
louisiana
maryland
massachusetts
michigan
mississippi
montana
nevada
newhampshire
newjersey
newmexico
newyork
northcarolina
northdakota
pennsylvania
rhodeisland


In [78]:
stateData

{'tennessee': {'white': {'population': 5200531,
   'inverted_ccvi': 45.91,
   'chance_of_infection': 15.44,
   'chance_of_death': 1.47,
   'generated_cases': 0,
   'generated_deaths': 0},
  'black': {'population': 1120221,
   'inverted_ccvi': 28.16,
   'chance_of_infection': 15.65,
   'chance_of_death': 1.68,
   'generated_cases': 0,
   'generated_deaths': 0},
  'native': {'population': 18151,
   'inverted_ccvi': 40.66,
   'chance_of_infection': 9.31,
   'chance_of_death': 1.12,
   'generated_cases': 0,
   'generated_deaths': 0},
  'asian': {'population': 117348,
   'inverted_ccvi': 49.7,
   'chance_of_infection': 9.41,
   'chance_of_death': 0.79,
   'generated_cases': 0,
   'generated_deaths': 0},
  'pacific': {'population': 3735,
   'inverted_ccvi': 37.37,
   'chance_of_infection': 24.52,
   'chance_of_death': 1.03,
   'generated_cases': 0,
   'generated_deaths': 0},
  'hispanic': {'population': 363753,
   'inverted_ccvi': 32.45,
   'chance_of_infection': 25.67,
   'chance_of_death':

In [79]:
with open("../resources/stateData.json", "w") as outfile:
    json.dump(stateData, outfile)