# Project: Improving Disaster Resilience

## Combine US Census Data with Standard FIPS Codes

### 
### Disaster Resilience Project

### Data Used: Standard US Counties FIPS Codes, U.S. Census input files at county level

Andrew Sommers

### Purpose

Combines US Census extracts and combines the data with the standardized FIPS table from the FIPS table preparation notebook.

### Notes:
US Census FIPS Codes: https://www.census.gov/library/reference/code-lists/ansi.html#cou


#### History 🗓️

Date | Person | Details
---- | ------ | -------
06/17/2023| Andrew Sommers|   Create initial notebook
08/25/2023| Andrew Sommers|   Update documentation



### Importing Libraries

Import required libraries

In [1]:
#import requests # request http, api
import pandas as pd # tabluar data
#from functools import reduce
import numpy as np
import os # change the notebooks data source directory to the Disaster Resilience  Directory - change this for your local environment
#
pd.options.display.max_columns = None # show all columns in display
pd.options.display.max_rows = None # show all rows in display

In [2]:
# load the standard FIPS codes and convert to a python dictionary
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\Tables')
FIPS_Prepared_Table = pd.read_csv(f'FIPS_Prepared_Table.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
US_Census_All_Counties = FIPS_Prepared_Table.copy()
US_Census_All_Counties.set_index('FIPS', inplace = True)
print('Number of Records Read = ', len(FIPS_Prepared_Table))

Number of Records Read =  3143


In [3]:
US_Census_All_Counties.head(3)

Unnamed: 0_level_0,STATE_CODE,STATE_FIPS,COUNTY_FIPS,COUNTY_NS,COUNTY_FULL_NAME,FIPS_CLASS_CODE,FUNCTION_STATUS,COUNTY,STATE
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0500000US01001,AL,1,1,161526,Autauga County,H1,A,AUTAUGA,ALABAMA
0500000US01003,AL,1,3,161527,Baldwin County,H1,A,BALDWIN,ALABAMA
0500000US01005,AL,1,5,161528,Barbour County,H1,A,BARBOUR,ALABAMA


In [4]:
# read the US Census population estimates for all US counties
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
US_Census_Population_2021 = pd.read_csv(f'US_Counties_Total_Population_2021.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
US_Census_Population_2021.rename({'GEO_ID':'FIPS'}, axis='columns', inplace=True)
US_Census_Population_2021.set_index('FIPS', inplace = True)
US_Census_Population_2021['POPULATION'] = pd.to_numeric(US_Census_Population_2021['POPULATION'], errors='coerce')
print('Number of Records Read = ', len(US_Census_Population_2021))

Number of Records Read =  3221


In [5]:
US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Population_2021['POPULATION'], how = 'left')
US_Census_All_Counties.head(3)

Unnamed: 0_level_0,STATE_CODE,STATE_FIPS,COUNTY_FIPS,COUNTY_NS,COUNTY_FULL_NAME,FIPS_CLASS_CODE,FUNCTION_STATUS,COUNTY,STATE,POPULATION
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0500000US01001,AL,1,1,161526,Autauga County,H1,A,AUTAUGA,ALABAMA,58239
0500000US01003,AL,1,3,161527,Baldwin County,H1,A,BALDWIN,ALABAMA,227131
0500000US01005,AL,1,5,161528,Barbour County,H1,A,BARBOUR,ALABAMA,25259


In [6]:
# read the US Census population density estimates for all US counties
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
US_Census_Pop_Density_2018 = pd.read_csv(f'PopulationDensityByCounty_2018.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
US_Census_Pop_Density_2018.rename({'GEO_ID':'FIPS'}, axis='columns', inplace=True)
US_Census_Pop_Density_2018.set_index('FIPS', inplace = True)
US_Census_Pop_Density_2018['POP_DENSITY'] = pd.to_numeric(US_Census_Pop_Density_2018['POP_DENSITY'], errors='coerce')
print('Number of Records Read = ', len(US_Census_Pop_Density_2018))

Number of Records Read =  3221


In [7]:
US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Pop_Density_2018['POP_DENSITY'], how = 'left')
US_Census_All_Counties.head(3)

Unnamed: 0_level_0,STATE_CODE,STATE_FIPS,COUNTY_FIPS,COUNTY_NS,COUNTY_FULL_NAME,FIPS_CLASS_CODE,FUNCTION_STATUS,COUNTY,STATE,POPULATION,POP_DENSITY
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0500000US01001,AL,1,1,161526,Autauga County,H1,A,AUTAUGA,ALABAMA,58239,91.8
0500000US01003,AL,1,3,161527,Baldwin County,H1,A,BALDWIN,ALABAMA,227131,114.6
0500000US01005,AL,1,5,161528,Barbour County,H1,A,BARBOUR,ALABAMA,25259,31.0


In [8]:
# read the US Census household classification estimates for all US counties
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
US_Household_UrbanRural_2020 = pd.read_csv(f'US_Counties_Household_UrbanRural_2020.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
US_Household_UrbanRural_2020.rename({'GEO_ID':'FIPS'}, axis='columns', inplace=True)
US_Household_UrbanRural_2020.set_index('FIPS', inplace = True)
US_Household_UrbanRural_2020['TOTAL_HOUSEHOLDS'] = pd.to_numeric(US_Household_UrbanRural_2020['TOTAL_HOUSEHOLDS'], errors='coerce')
US_Household_UrbanRural_2020['URBAN_HOUSEHOLDS'] = pd.to_numeric(US_Household_UrbanRural_2020['URBAN_HOUSEHOLDS'], errors='coerce')
US_Household_UrbanRural_2020['RURAL_HOUSEHOLDS'] = pd.to_numeric(US_Household_UrbanRural_2020['RURAL_HOUSEHOLDS'], errors='coerce')
US_Household_UrbanRural_2020.replace(to_replace = 0, value = 1, inplace= True)
US_Household_UrbanRural_2020['PCT_URBAN'] = round(US_Household_UrbanRural_2020['URBAN_HOUSEHOLDS'] / US_Household_UrbanRural_2020['TOTAL_HOUSEHOLDS'], 3)
US_Household_UrbanRural_2020['PCT_RURAL'] = round(US_Household_UrbanRural_2020['RURAL_HOUSEHOLDS'] / US_Household_UrbanRural_2020['RURAL_HOUSEHOLDS'], 3)
print('Number of Records Read = ', len(US_Census_Population_2021))

Number of Records Read =  3221


In [9]:
US_Census_All_Counties = US_Census_All_Counties.join(US_Household_UrbanRural_2020['PCT_URBAN'], how = 'left')
US_Census_All_Counties.head(3)

Unnamed: 0_level_0,STATE_CODE,STATE_FIPS,COUNTY_FIPS,COUNTY_NS,COUNTY_FULL_NAME,FIPS_CLASS_CODE,FUNCTION_STATUS,COUNTY,STATE,POPULATION,POP_DENSITY,PCT_URBAN
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0500000US01001,AL,1,1,161526,Autauga County,H1,A,AUTAUGA,ALABAMA,58239,91.8,0.59
0500000US01003,AL,1,3,161527,Baldwin County,H1,A,BALDWIN,ALABAMA,227131,114.6,0.672
0500000US01005,AL,1,5,161528,Barbour County,H1,A,BARBOUR,ALABAMA,25259,31.0,0.351


In [37]:
# read the US Census industry breakdown estimates for all US counties
#os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
#US_Census_Industry_2020 = pd.read_csv(f'US_Counties_Industry_Percent_Breakdown_2020.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
#US_Census_Industry_2020.rename({'GEO_ID':'FIPS'}, axis='columns', inplace=True)
#US_Census_Industry_2020.set_index('FIPS', inplace = True)
#print('Number of Records Read = ', len(US_Census_Population_2021))

In [38]:
#US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Industry_2020['AGRICULTURE'], how = 'left')
#US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Industry_2020[['PROFESSIONAL']], how = 'left')
#US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Industry_2020['INDUSTRY'], how = 'left')
#US_Census_All_Counties.head(3)

In [10]:
# read the US Census industry breakdown estimates for all US counties
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
US_Household_Income_2021 = pd.read_csv(f'US_Counties_Household_Income_2021.csv', dtype = str) #this input file contains US official state counties/parishes and Washington DC, only. No US territories are included.
US_Household_Income_2021.rename({'GEO_ID':'FIPS'}, axis='columns', inplace=True)
US_Household_Income_2021.set_index('FIPS', inplace = True)
US_Household_Income_2021['MEDIAN_INCOME'] = pd.to_numeric(US_Household_Income_2021['MEDIAN_INCOME'], errors='coerce')
US_Household_Income_2021['MEDIAN_INCOME'].fillna(0, inplace = True)
print('Number of Records Read = ', len(US_Household_Income_2021))

Number of Records Read =  3221


In [11]:
#join median household income
US_Census_All_Counties = US_Census_All_Counties.join(US_Household_Income_2021[['MEDIAN_INCOME']], how = 'left')
#US_Census_All_Counties = US_Census_All_Counties.join(US_Census_Industry_2020['INDUSTRY'], how = 'left')
US_Census_All_Counties.head(3)

Unnamed: 0_level_0,STATE_CODE,STATE_FIPS,COUNTY_FIPS,COUNTY_NS,COUNTY_FULL_NAME,FIPS_CLASS_CODE,FUNCTION_STATUS,COUNTY,STATE,POPULATION,POP_DENSITY,PCT_URBAN,MEDIAN_INCOME
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0500000US01001,AL,1,1,161526,Autauga County,H1,A,AUTAUGA,ALABAMA,58239,91.8,0.59,31338.0
0500000US01003,AL,1,3,161527,Baldwin County,H1,A,BALDWIN,ALABAMA,227131,114.6,0.672,35140.0
0500000US01005,AL,1,5,161528,Barbour County,H1,A,BARBOUR,ALABAMA,25259,31.0,0.351,20704.0


In [12]:
# output the combined data to a csv file
US_Census_All_Counties.reset_index(inplace=True)
US_Census_All_Counties = US_Census_All_Counties.rename(columns = {'index':'FIPS'})
os.chdir('C:\\Users\\andre\\OneDrive\\Documents\\IndianaUniversity\\D592\\Project_Disaster_Resilience\\Data\\usCensus')
US_Census_All_Counties.fillna(0, inplace = True)
US_Census_All_Counties.to_csv(f'US_Census_All_Counties.csv', header=True, index=False)
print('end of file combine and save function', len(US_Census_All_Counties))

end of file combine and save function 3143


### end of processing