In [None]:
#### insert required libraries ####
import numpy as np
import pandas as pd
import censusdata
import fetch_shp
import sys

In [None]:
### input parameters ###
year = int(2019)
state = "kansas"
county = "douglas"
geo_level = "ct"

In [None]:
from fetch_shp import get_fips_code_for_state
from fetch_shp import STATE_FIPS_DICT
state_number = get_fips_code_for_state(state)
if len(state)== 2:
    for key, state_dict in STATE_FIPS_DICT.items():
        if state_dict['abbreviation'] == state.upper():
            state = state_dict['name']

In [None]:
def county_list(state_number,county, year):
    list = censusdata.geographies(censusdata.censusgeo([('state', state_number), ('county', "*")]), "acs5", year)
    
    county_number = ""
    for key, county_dict in list.items():
        if key == county.title() + " County, "+ state.title():
            county_number = '%s' % (str(county_dict).split(':')[-1])       
    return county_number


county_number = county_list(state_number, county, year)

if len(county_number) == 0:
    print("invalid county name")

In [None]:
from fetch_shp import GEO_TYPES_DICT

def get_geo_type():
    for key, value in GEO_TYPES_DICT.items():
        if value['level'] == geo_level.lower():
            return key
geo_type = get_geo_type()

if geo_type == None:
    print("invalid geographic level"+"\n"+ 'only "Census Tract" and "Block Group" are acceptable levels'+
         '\n'+ 'Please enter "CT" or "BG" as input parameters fot the geo_type' )
    sys.exit()

In [None]:
#### Retrieve required census data and calculate desired demographic factors and national average values ####

def demographic_factors(state_number, county_number, year, method="acs5"):

    df_1 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', state_number), ('county', county_number),(geo_type, '*')]),
                               ['B03002_001E', 'B03002_003E'],
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names1 = ['total_population', 'White alone_not Hispanic or Latino']
    df_1.columns = column_names1
    df_1['Factor 1: White, nonHispanic'] = df_1.apply(lambda row: row['White alone_not Hispanic or Latino']/
                                                  row['total_population'], axis = 1)
    df_2 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', state_number), ('county', county_number), (geo_type, '*')]),
                               ['B25003_001E', 'B25003_002E'], 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names2 = ['total', 'owner_occupied']
    df_2.columns = column_names2
    df_2['Factor 2: Home Owners'] = df_2.apply(lambda row: row['owner_occupied']/row['total'], axis = 1)

    df_3 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', state_number), ('county', county_number), (geo_type, '*')]),
                               ['B17021_001E', 'B17021_002E'], 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names3 = ['total', 'persons in poverty']
    df_3.columns = column_names3
    df_3['Factor 3: earning higher than national poverty rate'] = df_3.apply(lambda row: 1-(row['persons in poverty']
                                                                                        /row['total']), axis = 1)
    
    column_ids4 = ['B15003_001E', 'B15003_017E', 'B15003_018E', 'B15003_019E', 'B15003_020E',
                                 'B15003_021E','B15003_022E','B15003_023E','B15003_024E','B15003_025E']
    df_4 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', state_number), ('county', county_number), (geo_type, '*')]),
                               column_ids4, 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names4 = ['total','Regular high school diploma','GED or alternative credential',
                     'Some college, less than 1 year','Some college, 1 or more years, no degree',
                     'Associate degree','Bachelor degree','Master degree','Professional school degree','Doctorate degree']
    df_4.columns = column_names4
    df_4['Factor 4: over 25 with high school diploma or higher'] = df_4.apply(lambda row: (row['Regular high school diploma']+
                                                                                       row['GED or alternative credential']+
                                                                                       row['Some college, less than 1 year']+
                                                                                       row['Some college, 1 or more years, no degree']+
                                                                                       row['Associate degree']+
                                                                                       row['Bachelor degree']+
                                                                                       row['Master degree']+
                                                                                       row['Professional school degree']+
                                                                                       row['Doctorate degree'])
                                                                                       /row['total'], axis = 1)

    if geo_type == 'tract':
        column_ids5 = ['B18101_001E', 'B18101_011E','B18101_014E','B18101_030E','B18101_033E']
        df_5 = censusdata.download(method,
                                   year,
                                   censusdata.censusgeo([('state', state_number), ('county', county_number), (geo_type, '*')]),
                                   column_ids5, 
                                   key = "a991ff442db3b1971f5399f9133f5415a45df982")

        column_names5 = ['total','M 18-34 no disability','M 35-64 no disability','F 18-34 no disability','F 35-64 no disability']

        df_5.columns = column_names5
        df_5['Factor 5: without disability age 18 to 65'] = df_5.apply(lambda row: (row['M 18-34 no disability']+
                                                                                 row['M 35-64 no disability']+
                                                                                 row['F 18-34 no disability']+
                                                                                 row['F 35-64 no disability'])/row['total'], axis = 1)
    elif geo_type == 'block group':
        column_ids5 = ['B01003_001E', 'C21007_006E','C21007_009E','C21007_013E','C21007_016E']
        df_5 = censusdata.download(method,
                           year,
                           censusdata.censusgeo([('state', state_number), ('county', county_number), (geo_type, '*')]),
                           column_ids5, 
                           key = "a991ff442db3b1971f5399f9133f5415a45df982")

        column_names5 = ['total','Vet below Pov 18-64 no disability','Vet above Pov 18-64 no disability',
                         'nonVet below Pov 18-64 no disability','nonVet above Pov 18-64 no disability']

        df_5.columns = column_names5
        df_5['Factor 5: without disability age 18 to 65'] = df_5.apply(lambda row: (row['Vet below Pov 18-64 no disability']+
                                                                                 row['Vet above Pov 18-64 no disability']+
                                                                                 row['nonVet below Pov 18-64 no disability']+
                                                                                 row['nonVet above Pov 18-64 no disability'])/row['total'], axis = 1)

    df_1 = df_1["Factor 1: White, nonHispanic"]
    df_2 = df_2["Factor 2: Home Owners"]
    df_3 = df_3["Factor 3: earning higher than national poverty rate"]
    df_4 = df_4["Factor 4: over 25 with high school diploma or higher"]
    df_5 = df_5["Factor 5: without disability age 18 to 65"]
    df_t = pd.concat([df_1, df_2, df_3, df_4, df_5], axis=1, join='inner')

    new_indices = []
    geo_type_names = []
    for index in df_t.index.tolist():
            if geo_type == "tract":
                new_index = index.geo[0][1] + index.geo[1][1] + index.geo[2][1]
            elif geo_type == "block group":
                new_index = index.geo[0][1] + index.geo[1][1] + index.geo[2][1] + index.geo[3][1]
            new_indices.append(new_index)
            geo_type_name = index.name.split(',')[0]
            geo_type_names.append(geo_type_name)
    df_t.index = new_indices
    df_t.insert(0, '%s number'%geo_type, geo_type_names, True)
    
    return df_t


df_t = demographic_factors(state_number, county_number, year)

In [None]:
def national_ave_values (year, method = 'acs5'):

    nav1 = censusdata.download(method, 
                               year, 
                               censusdata.censusgeo([('state', '*')]),
                               ['B03002_001E', 'B03002_003E'], 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names1 = ['total_population', 'White alone_not Hispanic or Latino']
    nav1.columns = column_names1
    nav1.loc['USA']= nav1.sum(numeric_only=True, axis=0)
    nav1['NAV-1: White, nonHispanic'] = nav1.apply(lambda row: row['White alone_not Hispanic or Latino']/
                                                  row['total_population'], axis = 1)

    nav2 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', '*')]),
                               ['B25003_001E', 'B25003_002E'], 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names2 = ['total', 'owner_occupied']
    nav2.columns = column_names2
    nav2.loc['USA']= nav2.sum(numeric_only=True, axis=0)
    nav2['NAV-2: Home Owners'] = nav2.apply(lambda row: row['owner_occupied']/row['total'], axis = 1)

    nav3 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', '*')]),
                               ['B17021_001E', 'B17021_002E'], 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names3 = ['total', 'persons in poverty']
    nav3.columns = column_names3
    nav3.loc['USA']= nav3.sum(numeric_only=True, axis=0)
    nav3['NAV-3: earning higher than national poverty rate'] = nav3.apply(lambda row: 1-(row['persons in poverty']
                                                                                        /row['total']), axis = 1)

    column_ids4 = ['B15003_001E', 'B15003_017E', 'B15003_018E', 'B15003_019E', 'B15003_020E',
                                 'B15003_021E','B15003_022E','B15003_023E','B15003_024E','B15003_025E']
    nav4 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', '*')]),
                               column_ids4, 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names4 = ['total','Regular high school diploma','GED or alternative credential',
                     'Some college, less than 1 year','Some college, 1 or more years, no degree',
                     'Associate degree','Bachelor degree','Master degree','Professional school degree','Doctorate degree']
    nav4.columns = column_names4
    nav4.loc['USA']= nav4.sum(numeric_only=True, axis=0)
    nav4['NAV-4: over 25 with high school diploma or higher'] = nav4.apply(lambda row: (row['Regular high school diploma']+
                                                                                       row['GED or alternative credential']+
                                                                                       row['Some college, less than 1 year']+
                                                                                       row['Some college, 1 or more years, no degree']+
                                                                                       row['Associate degree']+
                                                                                       row['Bachelor degree']+
                                                                                       row['Master degree']+
                                                                                       row['Professional school degree']+
                                                                                       row['Doctorate degree'])
                                                                                       /row['total'], axis = 1)

    column_ids5 = ['B18101_001E', 'B18101_011E','B18101_014E','B18101_030E','B18101_033E']
    nav5 = censusdata.download(method,
                               year,
                               censusdata.censusgeo([('state', '*')]),
                               column_ids5, 
                               key = "a991ff442db3b1971f5399f9133f5415a45df982")
    column_names5 = ['total','M 18-34 no disability','M 35-64 no disability','F 18-34 no disability','F 35-64 no disability']
    nav5.columns = column_names5
    nav5.loc['USA']= nav5.sum(numeric_only=True, axis=0)
    nav5['NAV-5: without disability age 18 to 65'] = nav5.apply(lambda row: (row['M 18-34 no disability']+
                                                                             row['M 35-64 no disability']+
                                                                             row['F 18-34 no disability']+
                                                                             row['F 35-64 no disability'])/row['total'], axis = 1)
    navs = np.array([
        nav1.iloc[52]['NAV-1: White, nonHispanic'],
        nav2.iloc[52]['NAV-2: Home Owners'],
        nav3.iloc[52]['NAV-3: earning higher than national poverty rate'],
        nav4.iloc[52]['NAV-4: over 25 with high school diploma or higher'],
        nav5.iloc[52]['NAV-5: without disability age 18 to 65']
    ])
    
    return navs


navs = national_ave_values(year)

In [None]:
#### Calcualte Social Vulnerability Score (SVS) and assign the associated zone ####

def compute_svs():
    df_t ['R1'] = df_t ['Factor 1: White, nonHispanic'] / navs[0]
    df_t ['R2'] = df_t ['Factor 2: Home Owners'] / navs[1]
    df_t ['R3'] = df_t ['Factor 3: earning higher than national poverty rate'] / navs[2]
    df_t ['R4'] = df_t ['Factor 4: over 25 with high school diploma or higher'] / navs[3]
    df_t ['R5'] = df_t ['Factor 5: without disability age 18 to 65'] / navs[4]
    df_t['SVS'] = df_t.apply(lambda row: (row['R1']+row['R2']+row['R3']+row['R4']+row['R5'])/5, axis = 1)
    
    maximum_nav = (1/navs[0] , 1/navs[1] , 1/navs[2] , 1/navs[3] , 1/navs[4])
    std = abs(1-(sum(maximum_nav)/len(maximum_nav)))/3
    LB_2 = 1-1.5*(std)
    LB_1 = 1-0.5*(std)
    UB_1 = 1+0.5*(std)
    UB_2 = 1+1.5*(std)
    
    zone = []
    for svs in df_t['SVS'].tolist():
        if svs < LB_2:
            new_zone = 'High Vulnerable (zone5)'
        elif svs < LB_1:
            new_zone = 'Medium to High Vulnerable (zone4)'
        elif svs < UB_1:
            new_zone = 'Medium Vulnerable (zone3)'
        elif svs < UB_2:
            new_zone = 'Medium to Low Vulnerable (zone2)'
        elif svs > UB_2:
            new_zone = 'Low Vulnerable (zone1)'
        else:
            new_zone = 'No Data'  
        zone.append(new_zone)
    
    df_t['zone'] = zone
    
    return df_t

df_t = compute_svs()
df_t = df_t.rename_axis('FIPS').sort_values(by='FIPS').reset_index()

In [None]:
display(df_t)

In [None]:
csv_flie_name = 'SVS values for %s County,%s at %s level.csv' %(county.title(), state.title(), geo_type)
df_t.to_csv(csv_flie_name)

In [None]:
from fetch_shp import get_one_geo_type     
zip_file_name = get_one_geo_type(geo_type, state, str(year))

In [None]:
import os
import geopandas
zip_file_name = "".join([str(item) for item in zip_file_name])
path_to_shp_file = os.path.join(os.getcwd(), zip_file_name)
path_to_shp_file = path_to_shp_file.replace(os.sep, '/')
shp_file = geopandas.read_file(path_to_shp_file)
geojson_file_name = state + geo_type +'.geojson'
shp_file.to_file(geojson_file_name, driver='GeoJSON')

In [None]:
import json
path_to_geojson_file = os.path.join(os.getcwd(),zip_file_name)
with open(geojson_file_name) as fp:
    data = json.load(fp)

In [None]:
import plotly.express as px
df_t = df_t.sort_values(by='SVS')
fig = px.choropleth(geojson=data, locations=df_t['FIPS'], color=df_t['zone'],
                    featureidkey = "properties.GEOID",
                    color_discrete_sequence = ["#FF0000","#FFB6C1","#FFFF00","#87CEEB","#9ACD32","#A9A9A9"],
                    color_discrete_map = {
                        "High Vulnerable (zone5)": "#FF0000",
                        "Medium to High Vulnerable (zone4)":"#FFB6C1",
                        "Medium Vulnerable (zone3)":"#FFFF00",
                        "Medium to Low Vulnerable (zone2)":"#87CEEB",
                        "Low Vulnerable (zone1)":"#9ACD32",
                        "No Data" : "#A9A9A9"
                    },
                    labels = {"color": "Vulnerability"},
                    title= 'Social Vulnerability Score at '+ geo_type.upper() + ' level for'+ "\n" + county.upper() + " County, " + state.upper(),
                    scope="usa"
                    )

fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})
fig.update_layout(legend=dict(
    y=0.95,
    x=0.85,
    title_font_family="Times New Roman",
    font=dict(
        family="Courier",
        size=12,
        color="black"),
    bordercolor = "black",
    borderwidth = 2))
fig.show()

In [None]:
image_flie_name = 'SVS map %s County,%s at %s level.png' %(county.title(), state.title(), geo_type)
fig.write_image(image_flie_name) 